Index: projects/fuse2/sys/fs/fuse/fuse_internal.c =================================================================== --- projects/fuse2/sys/fs/fuse/fuse_internal.c (revision 349395) +++ projects/fuse2/sys/fs/fuse/fuse_internal.c (revision 349396) @@ -1,1115 +1,1132 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_file.h" #include "fuse_internal.h" #include "fuse_io.h" #include "fuse_ipc.h" #include "fuse_node.h" #include "fuse_file.h" SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*"); #ifdef ZERO_PAD_INCOMPLETE_BUFS static int isbzero(void *buf, size_t len); #endif int fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags, struct vnode **vpp) { struct bintime now; struct thread *td = curthread; uint64_t nodeid = ino; int error; *vpp = NULL; error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp, fuse_vnode_cmp, &nodeid); if (error) return error; /* * Check the entry cache timeout. We have to do this within fusefs * instead of by using cache_enter_time/cache_lookup because those * routines are only intended to work with pathnames, not inodes */ if (*vpp != NULL) { getbinuptime(&now); if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){ atomic_add_acq_long(&fuse_lookup_cache_hits, 1); return 0; } else { /* Entry cache timeout */ atomic_add_acq_long(&fuse_lookup_cache_misses, 1); cache_purge(*vpp); vput(*vpp); *vpp = NULL; } } return 0; } /* Synchronously send a FUSE_ACCESS operation */ int fuse_internal_access(struct vnode *vp, accmode_t mode, struct thread *td, struct ucred *cred) { int err = 0; uint32_t mask = F_OK; int dataflags; int vtype; struct mount *mp; struct fuse_dispatcher fdi; struct fuse_access_in *fai; struct fuse_data *data; mp = vnode_mount(vp); vtype = vnode_vtype(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; if (mode == 0) return 0; if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) { switch (vp->v_type) { case VDIR: /* FALLTHROUGH */ case VLNK: /* FALLTHROUGH */ case VREG: return EROFS; default: break; } } /* Unless explicitly permitted, deny everyone except the fs owner. */ if (!(dataflags & FSESS_DAEMON_CAN_SPY)) { if (fuse_match_cred(data->daemoncred, cred)) return EPERM; } if (dataflags & FSESS_DEFAULT_PERMISSIONS) { struct vattr va; fuse_internal_getattr(vp, &va, cred, td); return vaccess(vp->v_type, va.va_mode, va.va_uid, va.va_gid, mode, cred, NULL); } if (!fsess_isimpl(mp, FUSE_ACCESS)) return 0; if ((mode & (VWRITE | VAPPEND | VADMIN)) != 0) mask |= W_OK; if ((mode & VREAD) != 0) mask |= R_OK; if ((mode & VEXEC) != 0) mask |= X_OK; fdisp_init(&fdi, sizeof(*fai)); fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred); fai = fdi.indata; fai->mask = mask; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_ACCESS); err = 0; } return err; } /* * Cache FUSE attributes from attr, in attribute cache associated with vnode * 'vp'. Optionally, if argument 'vap' is not NULL, store a copy of the * converted attributes there as well. * * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do * return the result to the caller). */ void fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr, uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap) { struct mount *mp; struct fuse_vnode_data *fvdat; struct fuse_data *data; struct vattr *vp_cache_at; mp = vnode_mount(vp); fvdat = VTOFUD(vp); data = fuse_get_mpdata(mp); ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs"); fuse_validity_2_bintime(attr_valid, attr_valid_nsec, &fvdat->attr_cache_timeout); /* Fix our buffers if the filesize changed without us knowing */ if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) { (void)fuse_vnode_setsize(vp, attr->size); fvdat->cached_attrs.va_size = attr->size; } if (attr_valid > 0 || attr_valid_nsec > 0) vp_cache_at = &(fvdat->cached_attrs); else if (vap != NULL) vp_cache_at = vap; else return; vattr_null(vp_cache_at); vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0]; vp_cache_at->va_fileid = attr->ino; vp_cache_at->va_mode = attr->mode & ~S_IFMT; vp_cache_at->va_nlink = attr->nlink; vp_cache_at->va_uid = attr->uid; vp_cache_at->va_gid = attr->gid; vp_cache_at->va_rdev = attr->rdev; vp_cache_at->va_size = attr->size; /* XXX on i386, seconds are truncated to 32 bits */ vp_cache_at->va_atime.tv_sec = attr->atime; vp_cache_at->va_atime.tv_nsec = attr->atimensec; vp_cache_at->va_mtime.tv_sec = attr->mtime; vp_cache_at->va_mtime.tv_nsec = attr->mtimensec; vp_cache_at->va_ctime.tv_sec = attr->ctime; vp_cache_at->va_ctime.tv_nsec = attr->ctimensec; if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0) vp_cache_at->va_blocksize = attr->blksize; else vp_cache_at->va_blocksize = PAGE_SIZE; vp_cache_at->va_type = IFTOVT(attr->mode); vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE; vp_cache_at->va_flags = 0; if (vap != vp_cache_at && vap != NULL) memcpy(vap, vp_cache_at, sizeof(*vap)); } /* fsync */ int fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio) { if (tick->tk_aw_ohead.error == ENOSYS) { fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick)); } return 0; } int fuse_internal_fsync(struct vnode *vp, struct thread *td, int waitfor, bool datasync) { struct fuse_fsync_in *ffsi = NULL; struct fuse_dispatcher fdi; struct fuse_filehandle *fufh; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct mount *mp = vnode_mount(vp); int op = FUSE_FSYNC; int err = 0; if (!fsess_isimpl(vnode_mount(vp), (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) { return 0; } if (vnode_isdir(vp)) op = FUSE_FSYNCDIR; if (!fsess_isimpl(mp, op)) return 0; fdisp_init(&fdi, sizeof(*ffsi)); /* * fsync every open file handle for this file, because we can't be sure * which file handle the caller is really referring to. */ LIST_FOREACH(fufh, &fvdat->handles, next) { if (ffsi == NULL) fdisp_make_vp(&fdi, op, vp, td, NULL); else fdisp_refresh_vp(&fdi, op, vp, td, NULL); ffsi = fdi.indata; ffsi->fh = fufh->fh_id; ffsi->fsync_flags = 0; if (datasync) ffsi->fsync_flags = 1; if (waitfor == MNT_WAIT) { err = fdisp_wait_answ(&fdi); } else { fuse_insert_callback(fdi.tick, fuse_internal_fsync_callback); fuse_insert_message(fdi.tick, false); } if (err == ENOSYS) { /* ENOSYS means "success, and don't call again" */ fsess_set_notimpl(mp, op); err = 0; break; } } fdisp_destroy(&fdi); return err; } /* Asynchronous invalidation */ SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_cache_hit, "struct vnode*", "struct vnode*"); int fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio) { struct fuse_notify_inval_entry_out fnieo; struct componentname cn; struct vnode *dvp, *vp; char name[PATH_MAX]; int err; if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0) return (err); if ((err = uiomove(name, fnieo.namelen, uio)) != 0) return (err); name[fnieo.namelen] = '\0'; /* fusefs does not cache "." or ".." entries */ if (strncmp(name, ".", sizeof(".")) == 0 || strncmp(name, "..", sizeof("..")) == 0) return (0); if (fnieo.parent == FUSE_ROOT_ID) err = VFS_ROOT(mp, LK_SHARED, &dvp); else err = fuse_internal_get_cached_vnode( mp, fnieo.parent, LK_SHARED, &dvp); /* * If dvp is not in the cache, then it must've been reclaimed. And * since fuse_vnop_reclaim does a cache_purge, name's entry must've * been invalidated already. So we can safely return if dvp == NULL */ if (err != 0 || dvp == NULL) return (err); /* * XXX we can't check dvp's generation because the FUSE invalidate * entry message doesn't include it. Worse case is that we invalidate * an entry that didn't need to be invalidated. */ cn.cn_nameiop = LOOKUP; cn.cn_flags = 0; /* !MAKEENTRY means free cached entry */ cn.cn_thread = curthread; cn.cn_cred = curthread->td_ucred; cn.cn_lkflags = LK_SHARED; cn.cn_pnbuf = NULL; cn.cn_nameptr = name; cn.cn_namelen = fnieo.namelen; err = cache_lookup(dvp, &vp, &cn, NULL, NULL); MPASS(err == 0); fuse_vnode_clear_attr_cache(dvp); vput(dvp); return (0); } int fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio) { struct fuse_notify_inval_inode_out fniio; struct vnode *vp; int err; if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0) return (err); if (fniio.ino == FUSE_ROOT_ID) err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp); else err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED, &vp); if (err != 0 || vp == NULL) return (err); /* * XXX we can't check vp's generation because the FUSE invalidate * entry message doesn't include it. Worse case is that we invalidate * an inode that didn't need to be invalidated. */ /* * Flush and invalidate buffers if off >= 0. Technically we only need * to flush and invalidate the range of offsets [off, off + len), but * for simplicity's sake we do everything. */ if (fniio.off >= 0) fuse_io_invalbuf(vp, curthread); fuse_vnode_clear_attr_cache(vp); vput(vp); return (0); } /* mknod */ int fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct fuse_data *data; struct fuse_mknod_in fmni; size_t insize; data = fuse_get_mpdata(dvp->v_mount); fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode); fmni.rdev = vap->va_rdev; if (fuse_libabi_geq(data, 7, 12)) { insize = sizeof(fmni); fmni.umask = curthread->td_proc->p_fd->fd_cmask; } else { insize = FUSE_COMPAT_MKNOD_IN_SIZE; } return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni, insize, vap->va_type)); } /* readdir */ int fuse_internal_readdir(struct vnode *vp, struct uio *uio, off_t startoff, struct fuse_filehandle *fufh, struct fuse_iov *cookediov, int *ncookies, u_long *cookies) { int err = 0; struct fuse_dispatcher fdi; struct fuse_read_in *fri = NULL; int fnd_start; if (uio_resid(uio) == 0) return 0; fdisp_init(&fdi, 0); /* * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p * I/O). */ /* * fnd_start is set non-zero once the offset in the directory gets * to the startoff. This is done because directories must be read * from the beginning (offset == 0) when fuse_vnop_readdir() needs * to do an open of the directory. * If it is not set non-zero here, it will be set non-zero in * fuse_internal_readdir_processdata() when uio_offset == startoff. */ fnd_start = 0; if (uio->uio_offset == startoff) fnd_start = 1; while (uio_resid(uio) > 0) { fdi.iosize = sizeof(*fri); if (fri == NULL) fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); else fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); fri = fdi.indata; fri->fh = fufh->fh_id; fri->offset = uio_offset(uio); fri->size = MIN(uio->uio_resid, fuse_get_mpdata(vp->v_mount)->max_read); if ((err = fdisp_wait_answ(&fdi))) break; if ((err = fuse_internal_readdir_processdata(uio, startoff, &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov, ncookies, &cookies))) break; } fdisp_destroy(&fdi); return ((err == -1) ? 0 : err); } /* * Return -1 to indicate that this readdir is finished, 0 if it copied * all the directory data read in and it may be possible to read more * and greater than 0 for a failure. */ int fuse_internal_readdir_processdata(struct uio *uio, off_t startoff, int *fnd_start, size_t reqsize, void *buf, size_t bufsize, struct fuse_iov *cookediov, int *ncookies, u_long **cookiesp) { int err = 0; int bytesavail; size_t freclen; struct dirent *de; struct fuse_dirent *fudge; u_long *cookies; cookies = *cookiesp; if (bufsize < FUSE_NAME_OFFSET) return -1; for (;;) { if (bufsize < FUSE_NAME_OFFSET) { err = -1; break; } fudge = (struct fuse_dirent *)buf; freclen = FUSE_DIRENT_SIZE(fudge); if (bufsize < freclen) { /* * This indicates a partial directory entry at the * end of the directory data. */ err = -1; break; } #ifdef ZERO_PAD_INCOMPLETE_BUFS if (isbzero(buf, FUSE_NAME_OFFSET)) { err = -1; break; } #endif if (!fudge->namelen || fudge->namelen > MAXNAMLEN) { err = EINVAL; break; } bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *) &fudge->namelen); if (bytesavail > uio_resid(uio)) { /* Out of space for the dir so we are done. */ err = -1; break; } /* * Don't start to copy the directory entries out until * the requested offset in the directory is found. */ if (*fnd_start != 0) { fiov_adjust(cookediov, bytesavail); bzero(cookediov->base, bytesavail); de = (struct dirent *)cookediov->base; de->d_fileno = fudge->ino; de->d_reclen = bytesavail; de->d_type = fudge->type; de->d_namlen = fudge->namelen; memcpy((char *)cookediov->base + sizeof(struct dirent) - MAXNAMLEN - 1, (char *)buf + FUSE_NAME_OFFSET, fudge->namelen); dirent_terminate(de); err = uiomove(cookediov->base, cookediov->len, uio); if (err) break; if (cookies != NULL) { if (*ncookies == 0) { err = -1; break; } *cookies = fudge->off; cookies++; (*ncookies)--; } } else if (startoff == fudge->off) *fnd_start = 1; buf = (char *)buf + freclen; bufsize -= freclen; uio_setoffset(uio, fudge->off); } *cookiesp = cookies; return err; } /* remove */ int fuse_internal_remove(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, enum fuse_opcode op) { struct fuse_dispatcher fdi; int err = 0; fdisp_init(&fdi, cnp->cn_namelen + 1); fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred); memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); return err; } /* rename */ int fuse_internal_rename(struct vnode *fdvp, struct componentname *fcnp, struct vnode *tdvp, struct componentname *tcnp) { struct fuse_dispatcher fdi; struct fuse_rename_in *fri; int err = 0; fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2); fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred); fri = fdi.indata; fri->newdir = VTOI(tdvp); memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr, fcnp->cn_namelen); ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0'; memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1, tcnp->cn_nameptr, tcnp->cn_namelen); ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 1] = '\0'; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); return err; } /* strategy */ /* entity creation */ void fuse_internal_newentry_makerequest(struct mount *mp, uint64_t dnid, struct componentname *cnp, enum fuse_opcode op, void *buf, size_t bufsize, struct fuse_dispatcher *fdip) { fdip->iosize = bufsize + cnp->cn_namelen + 1; fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred); memcpy(fdip->indata, buf, bufsize); memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0'; } int fuse_internal_newentry_core(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum vtype vtyp, struct fuse_dispatcher *fdip) { int err = 0; struct fuse_entry_out *feo; struct mount *mp = vnode_mount(dvp); if ((err = fdisp_wait_answ(fdip))) { return err; } feo = fdip->answ; if ((err = fuse_internal_checkentry(feo, vtyp))) { return err; } err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp); if (err) { fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred, feo->nodeid, 1); return err; } /* * Purge the parent's attribute cache because the daemon should've * updated its mtime and ctime */ fuse_vnode_clear_attr_cache(dvp); fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, feo->attr_valid_nsec, NULL); return err; } int fuse_internal_newentry(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum fuse_opcode op, void *buf, size_t bufsize, enum vtype vtype) { int err; struct fuse_dispatcher fdi; struct mount *mp = vnode_mount(dvp); fdisp_init(&fdi, 0); fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf, bufsize, &fdi); err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi); fdisp_destroy(&fdi); return err; } /* entity destruction */ int fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio) { fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL, ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1); return 0; } void fuse_internal_forget_send(struct mount *mp, struct thread *td, struct ucred *cred, uint64_t nodeid, uint64_t nlookup) { struct fuse_dispatcher fdi; struct fuse_forget_in *ffi; /* * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu", * (long long unsigned) nodeid)); */ fdisp_init(&fdi, sizeof(*ffi)); fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred); ffi = fdi.indata; ffi->nlookup = nlookup; fuse_insert_message(fdi.tick, false); fdisp_destroy(&fdi); } /* Fetch the vnode's attributes from the daemon*/ int fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { struct fuse_dispatcher fdi; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_getattr_in *fgai; struct fuse_attr_out *fao; off_t old_filesize = fvdat->cached_attrs.va_size; + struct timespec old_ctime = fvdat->cached_attrs.va_ctime; + struct timespec old_mtime = fvdat->cached_attrs.va_mtime; enum vtype vtyp; int err; fdisp_init(&fdi, 0); fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred); fgai = fdi.indata; /* * We could look up a file handle and set it in fgai->fh, but that * involves extra runtime work and I'm unaware of any file systems that * care. */ fgai->getattr_flags = 0; if ((err = fdisp_simple_putget_vp(&fdi, FUSE_GETATTR, vp, td, cred))) { if (err == ENOENT) fuse_internal_vnode_disappear(vp); goto out; } fao = (struct fuse_attr_out *)fdi.answ; vtyp = IFTOVT(fao->attr.mode); if (fvdat->flag & FN_SIZECHANGE) fao->attr.size = old_filesize; + if (fvdat->flag & FN_CTIMECHANGE) { + fao->attr.ctime = old_ctime.tv_sec; + fao->attr.ctimensec = old_ctime.tv_nsec; + } + if (fvdat->flag & FN_MTIMECHANGE) { + fao->attr.mtime = old_mtime.tv_sec; + fao->attr.mtimensec = old_mtime.tv_nsec; + } fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid, fao->attr_valid_nsec, vap); if (vtyp != vnode_vtype(vp)) { fuse_internal_vnode_disappear(vp); err = ENOENT; } out: fdisp_destroy(&fdi); return err; } /* Read a vnode's attributes from cache or fetch them from the fuse daemon */ int fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { struct vattr *attrs; if ((attrs = VTOVA(vp)) != NULL) { *vap = *attrs; /* struct copy */ return 0; } return fuse_internal_do_getattr(vp, vap, cred, td); } void fuse_internal_vnode_disappear(struct vnode *vp) { struct fuse_vnode_data *fvdat = VTOFUD(vp); ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear"); fvdat->flag |= FN_REVOKED; bintime_clear(&fvdat->attr_cache_timeout); bintime_clear(&fvdat->entry_cache_timeout); cache_purge(vp); } /* fuse start/stop */ int fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio) { int err = 0; struct fuse_data *data = tick->tk_data; struct fuse_init_out *fiio; if ((err = tick->tk_aw_ohead.error)) { goto out; } if ((err = fticket_pull(tick, uio))) { goto out; } fiio = fticket_resp(tick)->base; data->fuse_libabi_major = fiio->major; data->fuse_libabi_minor = fiio->minor; if (!fuse_libabi_geq(data, 7, 4)) { /* * With a little work we could support servers as old as 7.1. * But there would be little payoff. */ SDT_PROBE2(fusefs, , internal, trace, 1, "userpace version too low"); err = EPROTONOSUPPORT; goto out; } if (fuse_libabi_geq(data, 7, 5)) { if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) || fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) { data->max_write = fiio->max_write; if (fiio->flags & FUSE_ASYNC_READ) data->dataflags |= FSESS_ASYNC_READ; if (fiio->flags & FUSE_POSIX_LOCKS) data->dataflags |= FSESS_POSIX_LOCKS; if (fiio->flags & FUSE_EXPORT_SUPPORT) data->dataflags |= FSESS_EXPORT_SUPPORT; /* * Don't bother to check FUSE_BIG_WRITES, because it's * redundant with max_write */ /* * max_background, congestion_threshold, and time_gran * are not implemented */ } else { err = EINVAL; } } else { /* Old fixed values */ data->max_write = 4096; } if (fuse_libabi_geq(data, 7, 6)) data->max_readahead_blocks = fiio->max_readahead / maxbcachebuf; if (!fuse_libabi_geq(data, 7, 7)) fsess_set_notimpl(data->mp, FUSE_INTERRUPT); if (!fuse_libabi_geq(data, 7, 8)) { fsess_set_notimpl(data->mp, FUSE_BMAP); fsess_set_notimpl(data->mp, FUSE_DESTROY); } out: if (err) { fdata_set_dead(data); } FUSE_LOCK(); data->dataflags |= FSESS_INITED; wakeup(&data->ticketer); FUSE_UNLOCK(); return 0; } void fuse_internal_send_init(struct fuse_data *data, struct thread *td) { struct fuse_init_in *fiii; struct fuse_dispatcher fdi; fdisp_init(&fdi, sizeof(*fiii)); fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL); fiii = fdi.indata; fiii->major = FUSE_KERNEL_VERSION; fiii->minor = FUSE_KERNEL_MINOR_VERSION; /* * fusefs currently reads ahead no more than one cache block at a time. * See fuse_read_biobackend */ fiii->max_readahead = maxbcachebuf; /* * Unsupported features: * FUSE_FILE_OPS: No known FUSE server or client supports it * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it * FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even * when default ACLs are in use. */ fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES; fuse_insert_callback(fdi.tick, fuse_internal_init_callback); fuse_insert_message(fdi.tick, false); fdisp_destroy(&fdi); } /* * Send a FUSE_SETATTR operation with no permissions checks. If cred is NULL, * send the request with root credentials */ int fuse_internal_setattr(struct vnode *vp, struct vattr *vap, struct thread *td, struct ucred *cred) { + struct fuse_vnode_data *fvdat; struct fuse_dispatcher fdi; struct fuse_setattr_in *fsai; struct mount *mp; pid_t pid = td->td_proc->p_pid; struct fuse_data *data; int dataflags; int err = 0; enum vtype vtyp; int sizechanged = -1; uint64_t newsize = 0; mp = vnode_mount(vp); + fvdat = VTOFUD(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; fdisp_init(&fdi, sizeof(*fsai)); fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred); if (!cred) { fdi.finh->uid = 0; fdi.finh->gid = 0; } fsai = fdi.indata; fsai->valid = 0; if (vap->va_uid != (uid_t)VNOVAL) { fsai->uid = vap->va_uid; fsai->valid |= FATTR_UID; } if (vap->va_gid != (gid_t)VNOVAL) { fsai->gid = vap->va_gid; fsai->valid |= FATTR_GID; } if (vap->va_size != VNOVAL) { struct fuse_filehandle *fufh = NULL; /*Truncate to a new value. */ fsai->size = vap->va_size; sizechanged = 1; newsize = vap->va_size; fsai->valid |= FATTR_SIZE; fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); if (fufh) { fsai->fh = fufh->fh_id; fsai->valid |= FATTR_FH; } VTOFUD(vp)->flag &= ~FN_SIZECHANGE; } if (vap->va_atime.tv_sec != VNOVAL) { fsai->atime = vap->va_atime.tv_sec; fsai->atimensec = vap->va_atime.tv_nsec; fsai->valid |= FATTR_ATIME; if (vap->va_vaflags & VA_UTIMES_NULL) fsai->valid |= FATTR_ATIME_NOW; } if (vap->va_mtime.tv_sec != VNOVAL) { fsai->mtime = vap->va_mtime.tv_sec; fsai->mtimensec = vap->va_mtime.tv_nsec; fsai->valid |= FATTR_MTIME; if (vap->va_vaflags & VA_UTIMES_NULL) fsai->valid |= FATTR_MTIME_NOW; + } else if (fvdat->flag & FN_MTIMECHANGE) { + fsai->mtime = fvdat->cached_attrs.va_mtime.tv_sec; + fsai->mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec; + fsai->valid |= FATTR_MTIME; } if (vap->va_mode != (mode_t)VNOVAL) { fsai->mode = vap->va_mode & ALLPERMS; fsai->valid |= FATTR_MODE; } if (!fsai->valid) { goto out; } if ((err = fdisp_wait_answ(&fdi))) goto out; vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode); if (vnode_vtype(vp) != vtyp) { if (vnode_vtype(vp) == VNON && vtyp != VNON) { SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! " "vnode_vtype is VNON and vtype isn't."); } else { /* * STALE vnode, ditch * * The vnode has changed its type "behind our back". * There's nothing really we can do, so let us just * force an internal revocation and tell the caller to * try again, if interested. */ fuse_internal_vnode_disappear(vp); err = EAGAIN; } } if (err == 0) { struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ; + fuse_vnode_undirty_cached_timestamps(vp); fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid, fao->attr_valid_nsec, NULL); } out: fdisp_destroy(&fdi); return err; } #ifdef ZERO_PAD_INCOMPLETE_BUFS static int isbzero(void *buf, size_t len) { int i; for (i = 0; i < len; i++) { if (((char *)buf)[i]) return (0); } return (1); } #endif Index: projects/fuse2/sys/fs/fuse/fuse_io.c =================================================================== --- projects/fuse2/sys/fs/fuse/fuse_io.c (revision 349395) +++ projects/fuse2/sys/fs/fuse/fuse_io.c (revision 349396) @@ -1,1095 +1,1102 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_file.h" #include "fuse_node.h" #include "fuse_internal.h" #include "fuse_ipc.h" #include "fuse_io.h" /* * Set in a struct buf to indicate that the write came from the buffer cache * and the originating cred and pid are no longer known. */ #define B_FUSEFS_WRITE_CACHE B_FS_FLAG1 SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , io, trace, "int", "char*"); static void fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred, struct thread *td); static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh); static int fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid); static int fuse_write_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize, int ioflag, bool pages); static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid); /* * FreeBSD clears the SUID and SGID bits on any write by a non-root user. */ static void fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred, struct thread *td) { struct fuse_data *data; struct mount *mp; struct vattr va; int dataflags; mp = vnode_mount(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; if (dataflags & FSESS_DEFAULT_PERMISSIONS) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { fuse_internal_getattr(vp, &va, cred, td); if (va.va_mode & (S_ISUID | S_ISGID)) { mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID); /* Clear all vattr fields except mode */ vattr_null(&va); va.va_mode = mode; /* * Ignore fuse_internal_setattr's return value, * because at this point the write operation has * already succeeded and we don't want to return * failing status for that. */ (void)fuse_internal_setattr(vp, &va, td, NULL); } } } } SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch, "struct vnode*", "struct uio*", "int", "struct ucred*", "struct fuse_filehandle*"); SDT_PROBE_DEFINE4(fusefs, , io, io_dispatch_filehandles_closed, "struct vnode*", "struct uio*", "int", "struct ucred*"); int fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred, pid_t pid) { struct fuse_filehandle *fufh; int err, directio; int fflag; bool closefufh = false; MPASS(vp->v_type == VREG || vp->v_type == VDIR); fflag = (uio->uio_rw == UIO_READ) ? FREAD : FWRITE; err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { /* * nfsd will do I/O without first doing VOP_OPEN. We * must implicitly open the file here */ err = fuse_filehandle_open(vp, fflag, &fufh, curthread, cred); closefufh = true; } else if (err) { SDT_PROBE4(fusefs, , io, io_dispatch_filehandles_closed, vp, uio, ioflag, cred); printf("FUSE: io dispatch: filehandles are closed\n"); return err; } if (err) goto out; SDT_PROBE5(fusefs, , io, io_dispatch, vp, uio, ioflag, cred, fufh); /* * Ideally, when the daemon asks for direct io at open time, the * standard file flag should be set according to this, so that would * just change the default mode, which later on could be changed via * fcntl(2). * But this doesn't work, the O_DIRECT flag gets cleared at some point * (don't know where). So to make any use of the Fuse direct_io option, * we hardwire it into the file's private data (similarly to Linux, * btw.). */ directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); switch (uio->uio_rw) { case UIO_READ: if (directio) { SDT_PROBE2(fusefs, , io, trace, 1, "direct read of vnode"); err = fuse_read_directbackend(vp, uio, cred, fufh); } else { SDT_PROBE2(fusefs, , io, trace, 1, "buffered read of vnode"); err = fuse_read_biobackend(vp, uio, ioflag, cred, fufh, pid); } break; case UIO_WRITE: + fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); if (directio) { const int iosize = fuse_iosize(vp); off_t start, end, filesize; SDT_PROBE2(fusefs, , io, trace, 1, "direct write of vnode"); err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) goto out; start = uio->uio_offset; end = start + uio->uio_resid; KASSERT((ioflag & (IO_VMIO | IO_DIRECT)) != (IO_VMIO | IO_DIRECT), ("IO_DIRECT used for a cache flush?")); /* Invalidate the write cache when writing directly */ v_inval_buf_range(vp, start, end, iosize); err = fuse_write_directbackend(vp, uio, cred, fufh, filesize, ioflag, false); } else { SDT_PROBE2(fusefs, , io, trace, 1, "buffered write of vnode"); if (fuse_data_cache_mode == FUSE_CACHE_WT) ioflag |= IO_SYNC; err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag, pid); } fuse_io_clear_suid_on_write(vp, cred, uio->uio_td); break; default: panic("uninterpreted mode passed to fuse_io_dispatch"); } out: if (closefufh) fuse_filehandle_close(vp, fufh, curthread, cred); return (err); } SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_start, "int", "int", "int", "int"); SDT_PROBE_DEFINE2(fusefs, , io, read_bio_backend_feed, "int", "struct buf*"); SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_end, "int", "ssize_t", "int", "struct buf*"); static int fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid) { struct buf *bp; struct mount *mp; struct fuse_data *data; daddr_t lbn, nextlbn; int bcount, nextsize; int err, n = 0, on = 0, seqcount; off_t filesize; const int biosize = fuse_iosize(vp); mp = vnode_mount(vp); data = fuse_get_mpdata(mp); if (uio->uio_offset < 0) return (EINVAL); seqcount = ioflag >> IO_SEQSHIFT; err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) return err; for (err = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if (fuse_isdeadfs(vp)) { err = ENXIO; break; } if (filesize - uio->uio_offset <= 0) break; lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); if ((off_t)lbn * biosize >= filesize) { bcount = 0; } else if ((off_t)(lbn + 1) * biosize > filesize) { bcount = filesize - (off_t)lbn *biosize; } else { bcount = biosize; } nextlbn = lbn + 1; nextsize = MIN(biosize, filesize - nextlbn * biosize); SDT_PROBE4(fusefs, , io, read_bio_backend_start, biosize, (int)lbn, on, bcount); if (bcount < biosize) { /* If near EOF, don't do readahead */ err = bread(vp, lbn, bcount, NOCRED, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { /* Try clustered read */ long totread = uio->uio_resid + on; seqcount = MIN(seqcount, data->max_readahead_blocks + 1); err = cluster_read(vp, filesize, lbn, bcount, NOCRED, totread, seqcount, 0, &bp); } else if (seqcount > 1 && data->max_readahead_blocks >= 1) { /* Try non-clustered readahead */ err = breadn(vp, lbn, bcount, &nextlbn, &nextsize, 1, NOCRED, &bp); } else { /* Just read what was requested */ err = bread(vp, lbn, bcount, NOCRED, &bp); } if (err) { brelse(bp); bp = NULL; break; } /* * on is the offset into the current bp. Figure out how many * bytes we can copy out of the bp. Note that bcount is * NOT DEV_BSIZE aligned. * * Then figure out how many bytes we can copy into the uio. */ n = 0; if (on < bcount - bp->b_resid) n = MIN((unsigned)(bcount - bp->b_resid - on), uio->uio_resid); if (n > 0) { SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp); err = uiomove(bp->b_data + on, n, uio); } vfs_bio_brelse(bp, ioflag); SDT_PROBE4(fusefs, , io, read_bio_backend_end, err, uio->uio_resid, n, bp); if (bp->b_resid > 0) { /* Short read indicates EOF */ break; } } return (err); } SDT_PROBE_DEFINE1(fusefs, , io, read_directbackend_start, "struct fuse_read_in*"); SDT_PROBE_DEFINE3(fusefs, , io, read_directbackend_complete, "struct fuse_dispatcher*", "struct fuse_read_in*", "struct uio*"); static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh) { struct fuse_data *data; struct fuse_dispatcher fdi; struct fuse_read_in *fri; int err = 0; data = fuse_get_mpdata(vp->v_mount); if (uio->uio_resid == 0) return (0); fdisp_init(&fdi, 0); /* * XXX In "normal" case we use an intermediate kernel buffer for * transmitting data from daemon's context to ours. Eventually, we should * get rid of this. Anyway, if the target uio lives in sysspace (we are * called from pageops), and the input data doesn't need kernel-side * processing (we are not called from readdir) we can already invoke * an optimized, "peer-to-peer" I/O routine. */ while (uio->uio_resid > 0) { fdi.iosize = sizeof(*fri); fdisp_make_vp(&fdi, FUSE_READ, vp, uio->uio_td, cred); fri = fdi.indata; fri->fh = fufh->fh_id; fri->offset = uio->uio_offset; fri->size = MIN(uio->uio_resid, fuse_get_mpdata(vp->v_mount)->max_read); if (fuse_libabi_geq(data, 7, 9)) { /* See comment regarding FUSE_WRITE_LOCKOWNER */ fri->read_flags = 0; fri->flags = fufh_type_2_fflags(fufh->fufh_type); } SDT_PROBE1(fusefs, , io, read_directbackend_start, fri); if ((err = fdisp_wait_answ(&fdi))) goto out; SDT_PROBE3(fusefs, , io, read_directbackend_complete, &fdi, fri, uio); if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio))) break; if (fdi.iosize < fri->size) { /* * Short read. Should only happen at EOF or with * direct io. */ break; } } out: fdisp_destroy(&fdi); return (err); } static int fuse_write_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize, int ioflag, bool pages) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_data *data; struct fuse_write_in *fwi; struct fuse_write_out *fwo; struct fuse_dispatcher fdi; size_t chunksize; void *fwi_data; off_t as_written_offset; int diff; int err = 0; bool direct_io = fufh->fuse_open_flags & FOPEN_DIRECT_IO; + bool wrote_anything = false; uint32_t write_flags; data = fuse_get_mpdata(vp->v_mount); /* * Don't set FUSE_WRITE_LOCKOWNER in write_flags. It can't be set * accurately when using POSIX AIO, libfuse doesn't use it, and I'm not * aware of any file systems that do. It was an attempt to add * Linux-style mandatory locking to the FUSE protocol, but mandatory * locking is deprecated even on Linux. See Linux commit * f33321141b273d60cbb3a8f56a5489baad82ba5e . */ /* * Set FUSE_WRITE_CACHE whenever we don't know the uid, gid, and/or pid * that originated a write. For example when writing from the * writeback cache. I don't know of a single file system that cares, * but the protocol says we're supposed to do this. */ write_flags = !pages && ( (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)) || fuse_data_cache_mode != FUSE_CACHE_WB) ? 0 : FUSE_WRITE_CACHE; if (uio->uio_resid == 0) return (0); if (ioflag & IO_APPEND) uio_setoffset(uio, filesize); if (vn_rlimit_fsize(vp, uio, uio->uio_td)) return (EFBIG); fdisp_init(&fdi, 0); while (uio->uio_resid > 0) { chunksize = MIN(uio->uio_resid, data->max_write); fdi.iosize = sizeof(*fwi) + chunksize; fdisp_make_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred); fwi = fdi.indata; fwi->fh = fufh->fh_id; fwi->offset = uio->uio_offset; fwi->size = chunksize; fwi->write_flags = write_flags; if (fuse_libabi_geq(data, 7, 9)) { fwi->flags = fufh_type_2_fflags(fufh->fufh_type); fwi_data = (char *)fdi.indata + sizeof(*fwi); } else { fwi_data = (char *)fdi.indata + FUSE_COMPAT_WRITE_IN_SIZE; } if ((err = uiomove(fwi_data, chunksize, uio))) break; retry: err = fdisp_wait_answ(&fdi); if (err == ERESTART || err == EINTR || err == EWOULDBLOCK) { /* * Rewind the uio so dofilewrite will know it's * incomplete */ uio->uio_resid += fwi->size; uio->uio_offset -= fwi->size; /* * Change ERESTART into EINTR because we can't rewind * uio->uio_iov. Basically, once uiomove(9) has been * called, it's impossible to restart a syscall. */ if (err == ERESTART) err = EINTR; break; } else if (err) { break; + } else { + wrote_anything = true; } fwo = ((struct fuse_write_out *)fdi.answ); /* Adjust the uio in the case of short writes */ diff = fwi->size - fwo->size; as_written_offset = uio->uio_offset - diff; if (as_written_offset - diff > filesize) fuse_vnode_setsize(vp, as_written_offset); if (as_written_offset - diff >= filesize) fvdat->flag &= ~FN_SIZECHANGE; if (diff < 0) { printf("WARNING: misbehaving FUSE filesystem " "wrote more data than we provided it\n"); err = EINVAL; break; } else if (diff > 0) { /* Short write */ if (!direct_io) { printf("WARNING: misbehaving FUSE filesystem: " "short writes are only allowed with " "direct_io\n"); } if (ioflag & IO_DIRECT) { /* Return early */ uio->uio_resid += diff; uio->uio_offset -= diff; break; } else { /* Resend the unwritten portion of data */ fdi.iosize = sizeof(*fwi) + diff; /* Refresh fdi without clearing data buffer */ fdisp_refresh_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred); fwi = fdi.indata; MPASS2(fwi == fdi.indata, "FUSE dispatcher " "reallocated despite no increase in " "size?"); void *src = (char*)fwi_data + fwo->size; memmove(fwi_data, src, diff); fwi->fh = fufh->fh_id; fwi->offset = as_written_offset; fwi->size = diff; fwi->write_flags = write_flags; goto retry; } } } fdisp_destroy(&fdi); + + if (wrote_anything) + fuse_vnode_undirty_cached_timestamps(vp); return (err); } SDT_PROBE_DEFINE6(fusefs, , io, write_biobackend_start, "int64_t", "int", "int", "struct uio*", "int", "bool"); SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_append_race, "long", "int"); SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_issue, "int", "struct buf*"); static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct buf *bp; daddr_t lbn; off_t filesize; int bcount; int n, on, seqcount, err = 0; bool last_page; const int biosize = fuse_iosize(vp); seqcount = ioflag >> IO_SEQSHIFT; KASSERT(uio->uio_rw == UIO_WRITE, ("fuse_write_biobackend mode")); if (vp->v_type != VREG) return (EIO); if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) return err; if (ioflag & IO_APPEND) uio_setoffset(uio, filesize); if (vn_rlimit_fsize(vp, uio, uio->uio_td)) return (EFBIG); do { bool direct_append, extending; if (fuse_isdeadfs(vp)) { err = ENXIO; break; } lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); n = MIN((unsigned)(biosize - on), uio->uio_resid); again: /* Get or create a buffer for the write */ direct_append = uio->uio_offset == filesize && n; if (uio->uio_offset + n < filesize) { extending = false; if ((off_t)(lbn + 1) * biosize < filesize) { /* Not the file's last block */ bcount = biosize; } else { /* The file's last block */ bcount = filesize - (off_t)lbn * biosize; } } else { extending = true; bcount = on + n; } if (howmany(((off_t)lbn * biosize + on + n - 1), PAGE_SIZE) >= howmany(filesize, PAGE_SIZE)) last_page = true; else last_page = false; if (direct_append) { /* * Take care to preserve the buffer's B_CACHE state so * as not to cause an unnecessary read. */ bp = getblk(vp, lbn, on, PCATCH, 0, 0); if (bp != NULL) { uint32_t save = bp->b_flags & B_CACHE; allocbuf(bp, bcount); bp->b_flags |= save; } } else { bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); } if (!bp) { err = EINTR; break; } if (extending) { /* * Extend file _after_ locking buffer so we won't race * with other readers */ err = fuse_vnode_setsize(vp, uio->uio_offset + n); filesize = uio->uio_offset + n; fvdat->flag |= FN_SIZECHANGE; if (err) { brelse(bp); break; } } SDT_PROBE6(fusefs, , io, write_biobackend_start, lbn, on, n, uio, bcount, direct_append); /* * Issue a READ if B_CACHE is not set. In special-append * mode, B_CACHE is based on the buffer prior to the write * op and is typically set, avoiding the read. If a read * is required in special append mode, the server will * probably send us a short-read since we extended the file * on our end, resulting in b_resid == 0 and, thusly, * B_CACHE getting set. * * We can also avoid issuing the read if the write covers * the entire buffer. We have to make sure the buffer state * is reasonable in this case since we will not be initiating * I/O. See the comments in kern/vfs_bio.c's getblk() for * more information. * * B_CACHE may also be set due to the buffer being cached * normally. */ if (on == 0 && n == bcount) { bp->b_flags |= B_CACHE; bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; } if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); fuse_io_strategy(vp, bp); if ((err = bp->b_error)) { brelse(bp); break; } if (bp->b_resid > 0) { /* * Short read indicates EOF. Update file size * from the server and try again. */ SDT_PROBE2(fusefs, , io, trace, 1, "Short read during a RMW"); brelse(bp); err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) break; else goto again; } } if (bp->b_wcred == NOCRED) bp->b_wcred = crhold(cred); /* * If dirtyend exceeds file size, chop it down. This should * not normally occur but there is an append race where it * might occur XXX, so we log it. * * If the chopping creates a reverse-indexed or degenerate * situation with dirtyoff/end, we 0 both of them. */ if (bp->b_dirtyend > bcount) { SDT_PROBE2(fusefs, , io, write_biobackend_append_race, (long)bp->b_blkno * biosize, bp->b_dirtyend - bcount); bp->b_dirtyend = bcount; } if (bp->b_dirtyoff >= bp->b_dirtyend) bp->b_dirtyoff = bp->b_dirtyend = 0; /* * If the new write will leave a contiguous dirty * area, just update the b_dirtyoff and b_dirtyend, * otherwise force a write rpc of the old dirty area. * * While it is possible to merge discontiguous writes due to * our having a B_CACHE buffer ( and thus valid read data * for the hole), we don't because it could lead to * significant cache coherency problems with multiple clients, * especially if locking is implemented later on. * * as an optimization we could theoretically maintain * a linked list of discontinuous areas, but we would still * have to commit them separately so there isn't much * advantage to it except perhaps a bit of asynchronization. */ if (bp->b_dirtyend > 0 && (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { /* * Yes, we mean it. Write out everything to "storage" * immediately, without hesitation. (Apart from other * reasons: the only way to know if a write is valid * if its actually written out.) */ SDT_PROBE2(fusefs, , io, write_biobackend_issue, 0, bp); bwrite(bp); if (bp->b_error == EINTR) { err = EINTR; break; } goto again; } err = uiomove((char *)bp->b_data + on, n, uio); if (err) { bp->b_ioflags |= BIO_ERROR; bp->b_error = err; brelse(bp); break; /* TODO: vfs_bio_clrbuf like ffs_write does? */ } /* * Only update dirtyoff/dirtyend if not a degenerate * condition. */ if (n) { if (bp->b_dirtyend > 0) { bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); bp->b_dirtyend = MAX((on + n), bp->b_dirtyend); } else { bp->b_dirtyoff = on; bp->b_dirtyend = on + n; } vfs_bio_set_valid(bp, on, n); } vfs_bio_set_flags(bp, ioflag); bp->b_flags |= B_FUSEFS_WRITE_CACHE; if (ioflag & IO_SYNC) { SDT_PROBE2(fusefs, , io, write_biobackend_issue, 2, bp); if (!(ioflag & IO_VMIO)) bp->b_flags &= ~B_FUSEFS_WRITE_CACHE; err = bwrite(bp); } else if (vm_page_count_severe() || buf_dirty_count_severe() || (ioflag & IO_ASYNC)) { bp->b_flags |= B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 3, bp); bawrite(bp); } else if (on == 0 && n == bcount) { if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { bp->b_flags |= B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 4, bp); cluster_write(vp, bp, filesize, seqcount, 0); } else { SDT_PROBE2(fusefs, , io, write_biobackend_issue, 5, bp); bawrite(bp); } } else if (ioflag & IO_DIRECT) { bp->b_flags |= B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 6, bp); bawrite(bp); } else { bp->b_flags &= ~B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 7, bp); bdwrite(bp); } if (err) break; } while (uio->uio_resid > 0 && n > 0); return (err); } int fuse_io_strategy(struct vnode *vp, struct buf *bp) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh; struct ucred *cred; struct uio *uiop; struct uio uio; struct iovec io; off_t filesize; int error = 0; int fflag; /* We don't know the true pid when we're dealing with the cache */ pid_t pid = 0; const int biosize = fuse_iosize(vp); MPASS(vp->v_type == VREG || vp->v_type == VDIR); MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE); fflag = bp->b_iocmd == BIO_READ ? FREAD : FWRITE; cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred; error = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); if (bp->b_iocmd == BIO_READ && error == EBADF) { /* * This may be a read-modify-write operation on a cached file * opened O_WRONLY. The FUSE protocol allows this. */ error = fuse_filehandle_get(vp, FWRITE, &fufh, cred, pid); } if (error) { printf("FUSE: strategy: filehandles are closed\n"); bp->b_ioflags |= BIO_ERROR; bp->b_error = error; bufdone(bp); return (error); } uiop = &uio; uiop->uio_iov = &io; uiop->uio_iovcnt = 1; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = curthread; /* * clear BIO_ERROR and B_INVAL state prior to initiating the I/O. We * do this here so we do not have to do it in all the code that * calls us. */ bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; KASSERT(!(bp->b_flags & B_DONE), ("fuse_io_strategy: bp %p already marked done", bp)); if (bp->b_iocmd == BIO_READ) { ssize_t left; io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; uiop->uio_offset = ((off_t)bp->b_lblkno) * biosize; error = fuse_read_directbackend(vp, uiop, cred, fufh); /* * Store the amount we failed to read in the buffer's private * field, so callers can truncate the file if necessary' */ if (!error && uiop->uio_resid) { int nread = bp->b_bcount - uiop->uio_resid; left = uiop->uio_resid; bzero((char *)bp->b_data + nread, left); if ((fvdat->flag & FN_SIZECHANGE) == 0) { /* * A short read with no error, when not using * direct io, and when no writes are cached, * indicates EOF caused by a server-side * truncation. Clear the attr cache so we'll * pick up the new file size and timestamps. * * We must still bzero the remaining buffer so * uninitialized data doesn't get exposed by a * future truncate that extends the file. * * To prevent lock order problems, we must * truncate the file upstack, not here. */ SDT_PROBE2(fusefs, , io, trace, 1, "Short read of a clean file"); fuse_vnode_clear_attr_cache(vp); } else { /* * If dirty writes _are_ cached beyond EOF, * that indicates a newly created hole that the * server doesn't know about. Those don't pose * any problem. * XXX: we don't currently track whether dirty * writes are cached beyond EOF, before EOF, or * both. */ SDT_PROBE2(fusefs, , io, trace, 1, "Short read of a dirty file"); uiop->uio_resid = 0; } } if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; } } else { /* * Setup for actual write */ error = fuse_vnode_size(vp, &filesize, cred, curthread); if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; bufdone(bp); return (error); } if ((off_t)bp->b_lblkno * biosize + bp->b_dirtyend > filesize) bp->b_dirtyend = filesize - (off_t)bp->b_lblkno * biosize; if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; uiop->uio_offset = (off_t)bp->b_lblkno * biosize + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; bool pages = bp->b_flags & B_FUSEFS_WRITE_CACHE; error = fuse_write_directbackend(vp, uiop, cred, fufh, filesize, 0, pages); if (error == EINTR || error == ETIMEDOUT) { bp->b_flags &= ~(B_INVAL | B_NOCACHE); if ((bp->b_flags & B_PAGING) == 0) { bdirty(bp); bp->b_flags &= ~B_DONE; } if ((error == EINTR || error == ETIMEDOUT) && (bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; } else { if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_flags |= B_INVAL; bp->b_error = error; } bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; bufdone(bp); return (0); } } bp->b_resid = uiop->uio_resid; bufdone(bp); return (error); } int fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td) { return (vn_fsync_buf(vp, waitfor)); } /* * Flush and invalidate all dirty buffers. If another process is already * doing the flush, just wait for completion. */ int fuse_io_invalbuf(struct vnode *vp, struct thread *td) { struct fuse_vnode_data *fvdat = VTOFUD(vp); int error = 0; if (vp->v_iflag & VI_DOOMED) return 0; ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf"); while (fvdat->flag & FN_FLUSHINPROG) { struct proc *p = td->td_proc; if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) return EIO; fvdat->flag |= FN_FLUSHWANT; tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz); error = 0; if (p != NULL) { PROC_LOCK(p); if (SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) error = EINTR; PROC_UNLOCK(p); } if (error == EINTR) return EINTR; } fvdat->flag |= FN_FLUSHINPROG; if (vp->v_bufobj.bo_object != NULL) { VM_OBJECT_WLOCK(vp->v_bufobj.bo_object); vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object); } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); while (error) { if (error == ERESTART || error == EINTR) { fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return EINTR; } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); } fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return (error); } Index: projects/fuse2/sys/fs/fuse/fuse_node.c =================================================================== --- projects/fuse2/sys/fs/fuse/fuse_node.c (revision 349395) +++ projects/fuse2/sys/fs/fuse/fuse_node.c (revision 349396) @@ -1,443 +1,468 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_node.h" #include "fuse_internal.h" #include "fuse_io.h" #include "fuse_ipc.h" SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , node, trace, "int", "char*"); MALLOC_DEFINE(M_FUSEVN, "fuse_vnode", "fuse vnode private data"); static int sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS); static int fuse_node_count = 0; SYSCTL_INT(_vfs_fusefs, OID_AUTO, node_count, CTLFLAG_RD, &fuse_node_count, 0, "Count of FUSE vnodes"); int fuse_data_cache_mode = FUSE_CACHE_WT; SYSCTL_PROC(_vfs_fusefs, OID_AUTO, data_cache_mode, CTLTYPE_INT|CTLFLAG_RW, &fuse_data_cache_mode, 0, sysctl_fuse_cache_mode, "I", "Zero: disable caching of FUSE file data; One: write-through caching " "(default); Two: write-back caching (generally unsafe)"); static int sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS) { int val, error; val = *(int *)arg1; error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return (error); switch (val) { case FUSE_CACHE_UC: case FUSE_CACHE_WT: case FUSE_CACHE_WB: *(int *)arg1 = val; break; default: return (EDOM); } return (0); } static void fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data *fvdat, uint64_t nodeid, enum vtype vtyp) { fvdat->nid = nodeid; LIST_INIT(&fvdat->handles); vattr_null(&fvdat->cached_attrs); if (nodeid == FUSE_ROOT_ID) { vp->v_vflag |= VV_ROOT; } vp->v_type = vtyp; vp->v_data = fvdat; atomic_add_acq_int(&fuse_node_count, 1); } void fuse_vnode_destroy(struct vnode *vp) { struct fuse_vnode_data *fvdat = vp->v_data; vp->v_data = NULL; KASSERT(LIST_EMPTY(&fvdat->handles), ("Destroying fuse vnode with open files!")); free(fvdat, M_FUSEVN); atomic_subtract_acq_int(&fuse_node_count, 1); } int fuse_vnode_cmp(struct vnode *vp, void *nidp) { return (VTOI(vp) != *((uint64_t *)nidp)); } SDT_PROBE_DEFINE3(fusefs, , node, stale_vnode, "struct vnode*", "enum vtype", "uint64_t"); static int fuse_vnode_alloc(struct mount *mp, struct thread *td, uint64_t nodeid, enum vtype vtyp, struct vnode **vpp) { struct fuse_data *data; struct fuse_vnode_data *fvdat; struct vnode *vp2; int err = 0; data = fuse_get_mpdata(mp); if (vtyp == VNON) { return EINVAL; } *vpp = NULL; err = vfs_hash_get(mp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE, td, vpp, fuse_vnode_cmp, &nodeid); if (err) return (err); if (*vpp) { if ((*vpp)->v_type != vtyp) { /* * STALE vnode! This probably indicates a buggy * server, but it could also be the result of a race * between FUSE_LOOKUP and another client's * FUSE_UNLINK/FUSE_CREATE */ SDT_PROBE3(fusefs, , node, stale_vnode, *vpp, vtyp, nodeid); fuse_internal_vnode_disappear(*vpp); lockmgr((*vpp)->v_vnlock, LK_RELEASE, NULL); *vpp = NULL; return (EAGAIN); } MPASS((*vpp)->v_data != NULL); MPASS(VTOFUD(*vpp)->nid == nodeid); SDT_PROBE2(fusefs, , node, trace, 1, "vnode taken from hash"); return (0); } fvdat = malloc(sizeof(*fvdat), M_FUSEVN, M_WAITOK | M_ZERO); switch (vtyp) { case VFIFO: err = getnewvnode("fuse", mp, &fuse_fifoops, vpp); break; default: err = getnewvnode("fuse", mp, &fuse_vnops, vpp); break; } if (err) { free(fvdat, M_FUSEVN); return (err); } lockmgr((*vpp)->v_vnlock, LK_EXCLUSIVE, NULL); fuse_vnode_init(*vpp, fvdat, nodeid, vtyp); err = insmntque(*vpp, mp); ASSERT_VOP_ELOCKED(*vpp, "fuse_vnode_alloc"); if (err) { lockmgr((*vpp)->v_vnlock, LK_RELEASE, NULL); free(fvdat, M_FUSEVN); *vpp = NULL; return (err); } /* Disallow async reads for fifos because UFS does. I don't know why */ if (data->dataflags & FSESS_ASYNC_READ && vtyp != VFIFO) VN_LOCK_ASHARE(*vpp); err = vfs_hash_insert(*vpp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE, td, &vp2, fuse_vnode_cmp, &nodeid); if (err) { lockmgr((*vpp)->v_vnlock, LK_RELEASE, NULL); free(fvdat, M_FUSEVN); *vpp = NULL; return (err); } if (vp2 != NULL) { *vpp = vp2; return (0); } ASSERT_VOP_ELOCKED(*vpp, "fuse_vnode_alloc"); return (0); } int fuse_vnode_get(struct mount *mp, struct fuse_entry_out *feo, uint64_t nodeid, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum vtype vtyp) { struct thread *td = (cnp != NULL ? cnp->cn_thread : curthread); /* * feo should only be NULL for the root directory, which (when libfuse * is used) always has generation 0 */ uint64_t generation = feo ? feo->generation : 0; int err = 0; err = fuse_vnode_alloc(mp, td, nodeid, vtyp, vpp); if (err) { return err; } if (dvp != NULL) { MPASS(cnp && (cnp->cn_flags & ISDOTDOT) == 0); MPASS(cnp && !(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')); fuse_vnode_setparent(*vpp, dvp); } if (dvp != NULL && cnp != NULL && (cnp->cn_flags & MAKEENTRY) != 0 && feo != NULL && (feo->entry_valid != 0 || feo->entry_valid_nsec != 0)) { struct timespec timeout; ASSERT_VOP_LOCKED(*vpp, "fuse_vnode_get"); ASSERT_VOP_LOCKED(dvp, "fuse_vnode_get"); fuse_validity_2_timespec(feo, &timeout); cache_enter_time(dvp, *vpp, cnp, &timeout, NULL); } VTOFUD(*vpp)->generation = generation; /* * In userland, libfuse uses cached lookups for dot and dotdot entries, * thus it does not really bump the nlookup counter for forget. * Follow the same semantic and avoid the bump in order to keep * nlookup counters consistent. */ if (cnp == NULL || ((cnp->cn_flags & ISDOTDOT) == 0 && (cnp->cn_namelen != 1 || cnp->cn_nameptr[0] != '.'))) VTOFUD(*vpp)->nlookup++; return 0; } /* * Called for every fusefs vnode open to initialize the vnode (not * fuse_filehandle) for use */ void fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags, struct thread *td) { if (vnode_vtype(vp) == VREG) vnode_create_vobject(vp, 0, td); } int fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct thread *td = curthread; struct fuse_filehandle *fufh = NULL; struct fuse_dispatcher fdi; struct fuse_setattr_in *fsai; int err = 0; ASSERT_VOP_ELOCKED(vp, "fuse_io_extend"); if (fuse_isdeadfs(vp)) { return EBADF; } if (vnode_vtype(vp) == VDIR) { return EISDIR; } if (vfs_isrdonly(vnode_mount(vp))) { return EROFS; } if (cred == NULL) { cred = td->td_ucred; } fdisp_init(&fdi, sizeof(*fsai)); fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred); fsai = fdi.indata; fsai->valid = 0; /* Truncate to a new value. */ MPASS((fvdat->flag & FN_SIZECHANGE) != 0); fsai->size = fvdat->cached_attrs.va_size; fsai->valid |= FATTR_SIZE; fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); if (fufh) { fsai->fh = fufh->fh_id; fsai->valid |= FATTR_FH; } err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); if (err == 0) fvdat->flag &= ~FN_SIZECHANGE; return err; } /* * Adjust the vnode's size to a new value, such as that provided by * FUSE_GETATTR. */ int fuse_vnode_setsize(struct vnode *vp, off_t newsize) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct vattr *attrs; off_t oldsize; size_t iosize; struct buf *bp = NULL; int err = 0; ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize"); iosize = fuse_iosize(vp); oldsize = fvdat->cached_attrs.va_size; fvdat->cached_attrs.va_size = newsize; if ((attrs = VTOVA(vp)) != NULL) attrs->va_size = newsize; if (newsize < oldsize) { daddr_t lbn; err = vtruncbuf(vp, newsize, fuse_iosize(vp)); if (err) goto out; if (newsize % iosize == 0) goto out; /* * Zero the contents of the last partial block. * Sure seems like vtruncbuf should do this for us. */ lbn = newsize / iosize; bp = getblk(vp, lbn, iosize, PCATCH, 0, 0); if (!bp) { err = EINTR; goto out; } if (!(bp->b_flags & B_CACHE)) goto out; /* Nothing to do */ MPASS(bp->b_flags & B_VMIO); vfs_bio_clrbuf(bp); bp->b_dirtyend = MIN(bp->b_dirtyend, newsize - lbn * iosize); } out: if (bp) brelse(bp); vnode_pager_setsize(vp, newsize); return err; } /* Get the current, possibly dirty, size of the file */ int fuse_vnode_size(struct vnode *vp, off_t *filesize, struct ucred *cred, struct thread *td) { struct fuse_vnode_data *fvdat = VTOFUD(vp); int error = 0; if (!(fvdat->flag & FN_SIZECHANGE) && (VTOVA(vp) == NULL || fvdat->cached_attrs.va_size == VNOVAL)) error = fuse_internal_do_getattr(vp, NULL, cred, td); if (!error) *filesize = fvdat->cached_attrs.va_size; return error; } + +void +fuse_vnode_undirty_cached_timestamps(struct vnode *vp) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + + fvdat->flag &= ~(FN_MTIMECHANGE | FN_CTIMECHANGE); +} + +/* Update a fuse file's cached timestamps */ +void +fuse_vnode_update(struct vnode *vp, int flags) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct timespec ts; + + vfs_timestamp(&ts); + + if (flags & FN_MTIMECHANGE) + fvdat->cached_attrs.va_mtime = ts; + if (flags & FN_CTIMECHANGE) + fvdat->cached_attrs.va_ctime = ts; + + fvdat->flag |= flags; +} Index: projects/fuse2/sys/fs/fuse/fuse_node.h =================================================================== --- projects/fuse2/sys/fs/fuse/fuse_node.h (revision 349395) +++ projects/fuse2/sys/fs/fuse/fuse_node.h (revision 349396) @@ -1,183 +1,194 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FUSE_NODE_H_ #define _FUSE_NODE_H_ #include #include #include #include "fuse_file.h" -#define FN_REVOKED 0x00000020 -#define FN_FLUSHINPROG 0x00000040 -#define FN_FLUSHWANT 0x00000080 +#define FN_REVOKED 0x00000020 +#define FN_FLUSHINPROG 0x00000040 +#define FN_FLUSHWANT 0x00000080 /* * Indicates that the file's size is dirty; the kernel has changed it but not * yet send the change to the daemon. When this bit is set, the - * cache_attrs.va_size field does not time out + * cache_attrs.va_size field does not time out. */ -#define FN_SIZECHANGE 0x00000100 -#define FN_DIRECTIO 0x00000200 +#define FN_SIZECHANGE 0x00000100 +#define FN_DIRECTIO 0x00000200 /* Indicates that parent_nid is valid */ -#define FN_PARENT_NID 0x00000400 +#define FN_PARENT_NID 0x00000400 +/* + * Indicates that the file's cached timestamps are dirty. They will be flushed + * during the next SETATTR or WRITE. Until then, the cached fields will not + * time out. + */ +#define FN_MTIMECHANGE 0x00000800 +#define FN_CTIMECHANGE 0x00001000 + struct fuse_vnode_data { /** self **/ uint64_t nid; uint64_t generation; /** parent **/ uint64_t parent_nid; /** I/O **/ /* List of file handles for all of the vnode's open file descriptors */ LIST_HEAD(, fuse_filehandle) handles; /** flags **/ uint32_t flag; /** meta **/ /* The monotonic time after which the attr cache is invalid */ struct bintime attr_cache_timeout; /* * Monotonic time after which the entry is invalid. Used for lookups * by nodeid instead of pathname. */ struct bintime entry_cache_timeout; struct vattr cached_attrs; uint64_t nlookup; enum vtype vtype; }; /* * This overlays the fid structure (see mount.h). Mostly the same as the types * used by UFS and ext2. */ struct fuse_fid { uint16_t len; /* Length of structure. */ uint16_t pad; /* Force 32-bit alignment. */ uint32_t gen; /* Generation number. */ uint64_t nid; /* FUSE node id. */ }; #define VTOFUD(vp) \ ((struct fuse_vnode_data *)((vp)->v_data)) #define VTOI(vp) (VTOFUD(vp)->nid) static inline struct vattr* VTOVA(struct vnode *vp) { struct bintime now; getbinuptime(&now); if (bintime_cmp(&(VTOFUD(vp)->attr_cache_timeout), &now, >)) return &(VTOFUD(vp)->cached_attrs); else return NULL; } static inline void fuse_vnode_clear_attr_cache(struct vnode *vp) { bintime_clear(&VTOFUD(vp)->attr_cache_timeout); } static uint32_t inline fuse_vnode_hash(uint64_t id) { return (fnv_32_buf(&id, sizeof(id), FNV1_32_INIT)); } #define VTOILLU(vp) ((uint64_t)(VTOFUD(vp) ? VTOI(vp) : 0)) #define FUSE_NULL_ID 0 extern struct vop_vector fuse_fifoops; extern struct vop_vector fuse_vnops; int fuse_vnode_cmp(struct vnode *vp, void *nidp); static inline void fuse_vnode_setparent(struct vnode *vp, struct vnode *dvp) { if (dvp != NULL && vp->v_type == VDIR) { MPASS(dvp->v_type == VDIR); VTOFUD(vp)->parent_nid = VTOI(dvp); VTOFUD(vp)->flag |= FN_PARENT_NID; } } int fuse_vnode_size(struct vnode *vp, off_t *filesize, struct ucred *cred, struct thread *td); void fuse_vnode_destroy(struct vnode *vp); int fuse_vnode_get(struct mount *mp, struct fuse_entry_out *feo, uint64_t nodeid, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum vtype vtyp); void fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags, struct thread *td); int fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid); int fuse_vnode_setsize(struct vnode *vp, off_t newsize); +void fuse_vnode_undirty_cached_timestamps(struct vnode *vp); + +void fuse_vnode_update(struct vnode *vp, int flags); #endif /* _FUSE_NODE_H_ */ Index: projects/fuse2/tests/sys/fs/fusefs/io.cc =================================================================== --- projects/fuse2/tests/sys/fs/fusefs/io.cc (revision 349395) +++ projects/fuse2/tests/sys/fs/fusefs/io.cc (revision 349396) @@ -1,502 +1,502 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 The FreeBSD Foundation * * This software was developed by BFF Storage Systems, LLC under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ extern "C" { #include #include #include #include #include #include } #include "mockfs.hh" #include "utils.hh" /* * For testing I/O like fsx does, but deterministically and without a real * underlying file system * * TODO: after fusefs gains the options to select cache mode for each mount * point, run each of these tests for all cache modes. */ using namespace testing; const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; static void compare(const void *tbuf, const void *controlbuf, off_t baseofs, ssize_t size) { int i; for (i = 0; i < size; i++) { if (((const char*)tbuf)[i] != ((const char*)controlbuf)[i]) { off_t ofs = baseofs + i; FAIL() << "miscompare at offset " << std::hex << std::showbase << ofs << ". expected = " << std::setw(2) << (unsigned)((const uint8_t*)controlbuf)[i] << " got = " << (unsigned)((const uint8_t*)tbuf)[i]; } } } class Io: public FuseTest, public WithParamInterface> { public: int m_backing_fd, m_control_fd, m_test_fd; off_t m_filesize; Io(): m_backing_fd(-1), m_control_fd(-1) {}; void SetUp() { m_filesize = 0; m_backing_fd = open("backing_file", O_RDWR | O_CREAT | O_TRUNC, 0644); if (m_backing_fd < 0) FAIL() << strerror(errno); m_control_fd = open("control", O_RDWR | O_CREAT | O_TRUNC, 0644); if (m_control_fd < 0) FAIL() << strerror(errno); srandom(22'9'1982); // Seed with my birthday m_init_flags = get<0>(GetParam()); m_maxwrite = get<1>(GetParam()); m_async = get<2>(GetParam()); FuseTest::SetUp(); if (IsSkipped()) return; expect_lookup(RELPATH, ino, S_IFREG | 0644, 0, 1); expect_open(ino, 0, 1); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_WRITE && in.header.nodeid == ino); }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnImmediate([=](auto in, auto& out) { const char *buf = (const char*)in.body.bytes + sizeof(struct fuse_write_in); ssize_t isize = in.body.write.size; off_t iofs = in.body.write.offset; ASSERT_EQ(isize, pwrite(m_backing_fd, buf, isize, iofs)) << strerror(errno); SET_OUT_HEADER_LEN(out, write); out.body.write.size = isize; }))); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_READ && in.header.nodeid == ino); }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnImmediate([=](auto in, auto& out) { ssize_t isize = in.body.write.size; off_t iofs = in.body.write.offset; void *buf = out.body.bytes; ssize_t osize; osize = pread(m_backing_fd, buf, isize, iofs); ASSERT_LE(0, osize) << strerror(errno); out.header.len = sizeof(struct fuse_out_header) + osize; }))); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { - uint32_t valid = FATTR_SIZE | FATTR_FH; return (in.header.opcode == FUSE_SETATTR && in.header.nodeid == ino && - in.body.setattr.valid == valid); + (in.body.setattr.valid & FATTR_SIZE)); + }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnImmediate([=](auto in, auto& out) { ASSERT_EQ(0, ftruncate(m_backing_fd, in.body.setattr.size)) << strerror(errno); SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; out.body.attr.attr.mode = S_IFREG | 0755; out.body.attr.attr.size = in.body.setattr.size; out.body.attr.attr_valid = UINT64_MAX; }))); /* Any test that close()s will send FUSE_FLUSH and FUSE_RELEASE */ EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_FLUSH && in.header.nodeid == ino); }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnErrno(0))); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_RELEASE && in.header.nodeid == ino); }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnErrno(0))); m_test_fd = open(FULLPATH, O_RDWR ); EXPECT_LE(0, m_test_fd) << strerror(errno); } void TearDown() { if (m_test_fd >= 0) close(m_test_fd); if (m_backing_fd >= 0) close(m_backing_fd); if (m_control_fd >= 0) close(m_control_fd); FuseTest::TearDown(); /* Deliberately leak test_fd */ } void do_closeopen() { ASSERT_EQ(0, close(m_test_fd)) << strerror(errno); m_test_fd = open("backing_file", O_RDWR); ASSERT_LE(0, m_test_fd) << strerror(errno); ASSERT_EQ(0, close(m_control_fd)) << strerror(errno); m_control_fd = open("control", O_RDWR); ASSERT_LE(0, m_control_fd) << strerror(errno); } void do_ftruncate(off_t offs) { ASSERT_EQ(0, ftruncate(m_test_fd, offs)) << strerror(errno); ASSERT_EQ(0, ftruncate(m_control_fd, offs)) << strerror(errno); m_filesize = offs; } void do_mapread(ssize_t size, off_t offs) { void *control_buf, *p; off_t pg_offset, page_mask; size_t map_size; page_mask = getpagesize() - 1; pg_offset = offs & page_mask; map_size = pg_offset + size; p = mmap(NULL, map_size, PROT_READ, MAP_FILE | MAP_SHARED, m_test_fd, offs - pg_offset); ASSERT_NE(p, MAP_FAILED) << strerror(errno); control_buf = malloc(size); ASSERT_NE(NULL, control_buf) << strerror(errno); ASSERT_EQ(size, pread(m_control_fd, control_buf, size, offs)) << strerror(errno); compare((void*)((char*)p + pg_offset), control_buf, offs, size); ASSERT_EQ(0, munmap(p, map_size)) << strerror(errno); free(control_buf); } void do_read(ssize_t size, off_t offs) { void *test_buf, *control_buf; ssize_t r; test_buf = malloc(size); ASSERT_NE(NULL, test_buf) << strerror(errno); control_buf = malloc(size); ASSERT_NE(NULL, control_buf) << strerror(errno); errno = 0; r = pread(m_test_fd, test_buf, size, offs); ASSERT_NE(-1, r) << strerror(errno); ASSERT_EQ(size, r) << "unexpected short read"; r = pread(m_control_fd, control_buf, size, offs); ASSERT_NE(-1, r) << strerror(errno); ASSERT_EQ(size, r) << "unexpected short read"; compare(test_buf, control_buf, offs, size); free(control_buf); free(test_buf); } void do_mapwrite(ssize_t size, off_t offs) { char *buf; void *p; off_t pg_offset, page_mask; size_t map_size; long i; page_mask = getpagesize() - 1; pg_offset = offs & page_mask; map_size = pg_offset + size; buf = (char*)malloc(size); ASSERT_NE(NULL, buf) << strerror(errno); for (i=0; i < size; i++) buf[i] = random(); if (offs + size > m_filesize) { /* * Must manually extend. vm_mmap_vnode will not implicitly * extend a vnode */ do_ftruncate(offs + size); } p = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, m_test_fd, offs - pg_offset); ASSERT_NE(p, MAP_FAILED) << strerror(errno); bcopy(buf, (char*)p + pg_offset, size); ASSERT_EQ(size, pwrite(m_control_fd, buf, size, offs)) << strerror(errno); free(buf); ASSERT_EQ(0, munmap(p, map_size)) << strerror(errno); } void do_write(ssize_t size, off_t offs) { char *buf; long i; buf = (char*)malloc(size); ASSERT_NE(NULL, buf) << strerror(errno); for (i=0; i < size; i++) buf[i] = random(); ASSERT_EQ(size, pwrite(m_test_fd, buf, size, offs )) << strerror(errno); ASSERT_EQ(size, pwrite(m_control_fd, buf, size, offs)) << strerror(errno); m_filesize = std::max(m_filesize, offs + size); free(buf); } }; class IoCacheable: public Io { public: virtual void SetUp() { const char *node = "vfs.fusefs.data_cache_mode"; int val = 0; size_t size = sizeof(val); ASSERT_EQ(0, sysctlbyname(node, &val, &size, NULL, 0)) << strerror(errno); if (val == 0) GTEST_SKIP() << "fusefs data caching must be enabled for this test"; Io::SetUp(); } }; /* * Extend a file with dirty data in the last page of the last block. * * fsx -WR -P /tmp -S8 -N3 fsx.bin */ TEST_P(Io, extend_from_dirty_page) { off_t wofs = 0x21a0; ssize_t wsize = 0xf0a8; off_t rofs = 0xb284; ssize_t rsize = 0x9b22; off_t truncsize = 0x28702; do_write(wsize, wofs); do_ftruncate(truncsize); do_read(rsize, rofs); } /* * mapwrite into a newly extended part of a file. * * fsx -c 100 -i 100 -l 524288 -o 131072 -N5 -P /tmp -S19 fsx.bin */ TEST_P(IoCacheable, extend_by_mapwrite) { do_mapwrite(0x849e, 0x29a3a); /* [0x29a3a, 0x31ed7] */ do_mapwrite(0x3994, 0x3c7d8); /* [0x3c7d8, 0x4016b] */ do_read(0xf556, 0x30c16); /* [0x30c16, 0x4016b] */ } /* * When writing the last page of a file, it must be written synchronously. * Otherwise the cached page can become invalid by a subsequent extend * operation. * * fsx -WR -P /tmp -S642 -N3 fsx.bin */ TEST_P(Io, last_page) { do_write(0xcc77, 0x1134f); /* [0x1134f, 0x1dfc5] */ do_write(0xdfa7, 0x2096a); /* [0x2096a, 0x2e910] */ do_read(0xb5b7, 0x1a3aa); /* [0x1a3aa, 0x25960] */ } /* * Read a hole using mmap * * fsx -c 100 -i 100 -l 524288 -o 131072 -N11 -P /tmp -S14 fsx.bin */ TEST_P(IoCacheable, mapread_hole) { do_write(0x123b7, 0xf205); /* [0xf205, 0x215bb] */ do_mapread(0xeeea, 0x2f4c); /* [0x2f4c, 0x11e35] */ } /* * Read a hole from a block that contains some cached data. * * fsx -WR -P /tmp -S55 fsx.bin */ TEST_P(Io, read_hole_from_cached_block) { off_t wofs = 0x160c5; ssize_t wsize = 0xa996; off_t rofs = 0x472e; ssize_t rsize = 0xd8d5; do_write(wsize, wofs); do_read(rsize, rofs); } /* * Truncating a file into a dirty buffer should not causing anything untoward * to happen when that buffer is eventually flushed. * * fsx -WR -P /tmp -S839 -d -N6 fsx.bin */ TEST_P(Io, truncate_into_dirty_buffer) { off_t wofs0 = 0x3bad7; ssize_t wsize0 = 0x4529; off_t wofs1 = 0xc30d; ssize_t wsize1 = 0x5f77; off_t truncsize0 = 0x10916; off_t rofs = 0xdf17; ssize_t rsize = 0x29ff; off_t truncsize1 = 0x152b4; do_write(wsize0, wofs0); do_write(wsize1, wofs1); do_ftruncate(truncsize0); do_read(rsize, rofs); do_ftruncate(truncsize1); close(m_test_fd); } /* * Truncating a file into a dirty buffer should not causing anything untoward * to happen when that buffer is eventually flushed, even when the buffer's * dirty_off is > 0. * * Based on this command with a few steps removed: * fsx -WR -P /tmp -S677 -d -N8 fsx.bin */ TEST_P(Io, truncate_into_dirty_buffer2) { off_t truncsize0 = 0x344f3; off_t wofs = 0x2790c; ssize_t wsize = 0xd86a; off_t truncsize1 = 0x2de38; off_t rofs2 = 0x1fd7a; ssize_t rsize2 = 0xc594; off_t truncsize2 = 0x31e71; /* Sets the file size to something larger than the next write */ do_ftruncate(truncsize0); /* * Creates a dirty buffer. The part in lbn 2 doesn't flush * synchronously. */ do_write(wsize, wofs); /* Truncates part of the dirty buffer created in step 2 */ do_ftruncate(truncsize1); /* XXX ?I don't know why this is necessary? */ do_read(rsize2, rofs2); /* Truncates the dirty buffer */ do_ftruncate(truncsize2); close(m_test_fd); } /* * Regression test for a bug introduced in r348931 * * Sequence of operations: * 1) The first write reads lbn so it can modify it * 2) The first write flushes lbn 3 immediately because it's the end of file * 3) The first write then flushes lbn 4 because it's the end of the file * 4) The second write modifies the cached versions of lbn 3 and 4 * 5) The third write's getblkx invalidates lbn 4's B_CACHE because it's * extending the buffer. Then it flushes lbn 4 because B_DELWRI was set but * B_CACHE was clear. * 6) fuse_write_biobackend erroneously called vfs_bio_clrbuf, putting the * buffer into a weird write-only state. All read operations would return * 0. Writes were apparently still processed, because the buffer's contents * were correct when examined in a core dump. * 7) The third write reads lbn 4 because cache is clear * 9) uiomove dutifully copies new data into the buffer * 10) The buffer's dirty is flushed to lbn 4 * 11) The read returns all zeros because of step 6. * * Based on: * fsx -WR -l 524388 -o 131072 -P /tmp -S6456 -q fsx.bin */ TEST_P(Io, resize_a_valid_buffer_while_extending) { do_write(0x14530, 0x36ee6); /* [0x36ee6, 0x4b415] */ do_write(0x1507c, 0x33256); /* [0x33256, 0x482d1] */ do_write(0x175c, 0x4c03d); /* [0x4c03d, 0x4d798] */ do_read(0xe277, 0x3599c); /* [0x3599c, 0x43c12] */ close(m_test_fd); } INSTANTIATE_TEST_CASE_P(Io, Io, Combine(Values(0, FUSE_ASYNC_READ), /* m_init_flags */ Values(0x1000, 0x10000, 0x20000), /* m_maxwrite */ Bool())); /* m_async */ INSTANTIATE_TEST_CASE_P(Io, IoCacheable, Combine(Values(0, FUSE_ASYNC_READ), /* m_init_flags */ Values(0x1000, 0x10000, 0x20000), /* m_maxwrite */ Bool())); /* m_async */ Index: projects/fuse2/tests/sys/fs/fusefs/write.cc =================================================================== --- projects/fuse2/tests/sys/fs/fusefs/write.cc (revision 349395) +++ projects/fuse2/tests/sys/fs/fusefs/write.cc (revision 349396) @@ -1,1070 +1,1192 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 The FreeBSD Foundation * * This software was developed by BFF Storage Systems, LLC under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ extern "C" { #include #include #include #include #include #include #include #include #include #include #include } #include "mockfs.hh" #include "utils.hh" using namespace testing; class Write: public FuseTest { public: static sig_atomic_t s_sigxfsz; void SetUp() { s_sigxfsz = 0; FuseTest::SetUp(); } void TearDown() { struct sigaction sa; bzero(&sa, sizeof(sa)); sa.sa_handler = SIG_DFL; sigaction(SIGXFSZ, &sa, NULL); FuseTest::TearDown(); } void expect_lookup(const char *relpath, uint64_t ino, uint64_t size) { FuseTest::expect_lookup(relpath, ino, S_IFREG | 0644, size, 1); } void expect_release(uint64_t ino, ProcessMockerT r) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_RELEASE && in.header.nodeid == ino); }, Eq(true)), _) ).WillRepeatedly(Invoke(r)); } void expect_write(uint64_t ino, uint64_t offset, uint64_t isize, uint64_t osize, const void *contents) { FuseTest::expect_write(ino, offset, isize, osize, 0, 0, contents); } /* Expect a write that may or may not come, depending on the cache mode */ void maybe_expect_write(uint64_t ino, uint64_t offset, uint64_t size, const void *contents) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { const char *buf = (const char*)in.body.bytes + sizeof(struct fuse_write_in); return (in.header.opcode == FUSE_WRITE && in.header.nodeid == ino && in.body.write.offset == offset && in.body.write.size == size && 0 == bcmp(buf, contents, size)); }, Eq(true)), _) ).Times(AtMost(1)) .WillRepeatedly(Invoke( ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, write); out.body.write.size = size; }) )); } }; class WriteCacheable: public Write { public: virtual void SetUp() { const char *node = "vfs.fusefs.data_cache_mode"; int val = 0; size_t size = sizeof(val); FuseTest::SetUp(); ASSERT_EQ(0, sysctlbyname(node, &val, &size, NULL, 0)) << strerror(errno); if (val == 0) GTEST_SKIP() << "fusefs data caching must be enabled for this test"; } }; sig_atomic_t Write::s_sigxfsz = 0; class Write_7_8: public FuseTest { public: virtual void SetUp() { m_kernel_minor_version = 8; FuseTest::SetUp(); } void expect_lookup(const char *relpath, uint64_t ino, uint64_t size) { FuseTest::expect_lookup_7_8(relpath, ino, S_IFREG | 0644, size, 1); } }; class AioWrite: public Write { virtual void SetUp() { const char *node = "vfs.aio.enable_unsafe"; int val = 0; size_t size = sizeof(val); FuseTest::SetUp(); ASSERT_EQ(0, sysctlbyname(node, &val, &size, NULL, 0)) << strerror(errno); if (!val) GTEST_SKIP() << "vfs.aio.enable_unsafe must be set for this test"; } }; /* Tests for the write-through cache mode */ class WriteThrough: public Write { public: virtual void SetUp() { const char *cache_mode_node = "vfs.fusefs.data_cache_mode"; int val = 0; size_t size = sizeof(val); FuseTest::SetUp(); if (IsSkipped()) return; ASSERT_EQ(0, sysctlbyname(cache_mode_node, &val, &size, NULL, 0)) << strerror(errno); if (val != 1) GTEST_SKIP() << "vfs.fusefs.data_cache_mode must be set to 1 " "(writethrough) for this test"; } void expect_write(uint64_t ino, uint64_t offset, uint64_t isize, uint64_t osize, const void *contents) { FuseTest::expect_write(ino, offset, isize, osize, 0, FUSE_WRITE_CACHE, contents); } }; /* Tests for the writeback cache mode */ class WriteBack: public Write { public: virtual void SetUp() { const char *node = "vfs.fusefs.data_cache_mode"; int val = 0; size_t size = sizeof(val); FuseTest::SetUp(); if (IsSkipped()) return; ASSERT_EQ(0, sysctlbyname(node, &val, &size, NULL, 0)) << strerror(errno); if (val != 2) GTEST_SKIP() << "vfs.fusefs.data_cache_mode must be set to 2 " "(writeback) for this test"; } void expect_write(uint64_t ino, uint64_t offset, uint64_t isize, uint64_t osize, const void *contents) { FuseTest::expect_write(ino, offset, isize, osize, FUSE_WRITE_CACHE, 0, contents); } }; class WriteBackAsync: public WriteBack { public: virtual void SetUp() { m_async = true; WriteBack::SetUp(); } }; /* Tests for clustered writes with WriteBack cacheing */ class WriteCluster: public WriteBack { public: virtual void SetUp() { if (m_maxphys < 2 * DFLTPHYS) GTEST_SKIP() << "MAXPHYS must be at least twice DFLTPHYS" << " for this test"; m_async = true; m_maxwrite = m_maxphys; WriteBack::SetUp(); if (m_maxphys < 2 * m_maxbcachebuf) GTEST_SKIP() << "MAXPHYS must be at least twice maxbcachebuf" << " for this test"; } }; void sigxfsz_handler(int __unused sig) { Write::s_sigxfsz = 1; } /* AIO writes need to set the header's pid field correctly */ /* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236379 */ TEST_F(AioWrite, DISABLED_aio_write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; uint64_t offset = 4096; int fd; ssize_t bufsize = strlen(CONTENTS); struct aiocb iocb, *piocb; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, offset, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); iocb.aio_nbytes = bufsize; iocb.aio_fildes = fd; iocb.aio_buf = (void *)CONTENTS; iocb.aio_offset = offset; iocb.aio_sigevent.sigev_notify = SIGEV_NONE; ASSERT_EQ(0, aio_write(&iocb)) << strerror(errno); ASSERT_EQ(bufsize, aio_waitcomplete(&piocb, NULL)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * When a file is opened with O_APPEND, we should forward that flag to * FUSE_OPEN (tested by Open.o_append) but still attempt to calculate the * offset internally. That way we'll work both with filesystems that * understand O_APPEND (and ignore the offset) and filesystems that don't (and * simply use the offset). * * Note that verifying the O_APPEND flag in FUSE_OPEN is done in the * Open.o_append test. */ TEST_F(Write, append) { const ssize_t BUFSIZE = 9; const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char CONTENTS[BUFSIZE] = "abcdefgh"; uint64_t ino = 42; /* * Set offset to a maxbcachebuf boundary so we don't need to RMW when * using writeback caching */ uint64_t initial_offset = m_maxbcachebuf; int fd; expect_lookup(RELPATH, ino, initial_offset); expect_open(ino, 0, 1); expect_write(ino, initial_offset, BUFSIZE, BUFSIZE, CONTENTS); /* Must open O_RDWR or fuse(4) implicitly sets direct_io */ fd = open(FULLPATH, O_RDWR | O_APPEND); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* If a file is cached, then appending to the end should not cause a read */ TEST_F(Write, append_to_cached) { const ssize_t BUFSIZE = 9; const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; char *oldcontents, *oldbuf; const char CONTENTS[BUFSIZE] = "abcdefgh"; uint64_t ino = 42; /* * Set offset in between maxbcachebuf boundary to test buffer handling */ uint64_t oldsize = m_maxbcachebuf / 2; int fd; oldcontents = (char*)calloc(1, oldsize); ASSERT_NE(NULL, oldcontents) << strerror(errno); oldbuf = (char*)malloc(oldsize); ASSERT_NE(NULL, oldbuf) << strerror(errno); expect_lookup(RELPATH, ino, oldsize); expect_open(ino, 0, 1); expect_read(ino, 0, oldsize, oldsize, oldcontents); maybe_expect_write(ino, oldsize, BUFSIZE, CONTENTS); /* Must open O_RDWR or fuse(4) implicitly sets direct_io */ fd = open(FULLPATH, O_RDWR | O_APPEND); EXPECT_LE(0, fd) << strerror(errno); /* Read the old data into the cache */ ASSERT_EQ((ssize_t)oldsize, read(fd, oldbuf, oldsize)) << strerror(errno); /* Write the new data. There should be no more read operations */ ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } TEST_F(Write, append_direct_io) { const ssize_t BUFSIZE = 9; const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char CONTENTS[BUFSIZE] = "abcdefgh"; uint64_t ino = 42; uint64_t initial_offset = 4096; int fd; expect_lookup(RELPATH, ino, initial_offset); expect_open(ino, FOPEN_DIRECT_IO, 1); expect_write(ino, initial_offset, BUFSIZE, BUFSIZE, CONTENTS); fd = open(FULLPATH, O_WRONLY | O_APPEND); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* A direct write should evict any overlapping cached data */ TEST_F(Write, direct_io_evicts_cache) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char CONTENTS0[] = "abcdefgh"; const char CONTENTS1[] = "ijklmnop"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS0) + 1; char readbuf[bufsize]; expect_lookup(RELPATH, ino, bufsize); expect_open(ino, 0, 1); expect_read(ino, 0, bufsize, bufsize, CONTENTS0); expect_write(ino, 0, bufsize, bufsize, CONTENTS1); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); // Prime cache ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); // Write directly, evicting cache ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno); ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS1, bufsize)) << strerror(errno); // Read again. Cache should be bypassed expect_read(ino, 0, bufsize, bufsize, CONTENTS1); ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno); ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); ASSERT_STREQ(readbuf, CONTENTS1); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * If the server doesn't return FOPEN_DIRECT_IO during FUSE_OPEN, then it's not * allowed to return a short write for that file handle. However, if it does * then we should still do our darndest to handle it by resending the unwritten * portion. */ TEST_F(Write, indirect_io_short_write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefghijklmnop"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); ssize_t bufsize0 = 11; ssize_t bufsize1 = strlen(CONTENTS) - bufsize0; const char *contents1 = CONTENTS + bufsize0; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize0, CONTENTS); expect_write(ino, bufsize0, bufsize1, bufsize1, contents1); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * When the direct_io option is used, filesystems are allowed to write less * data than requested. We should return the short write to userland. */ TEST_F(Write, direct_io_short_write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefghijklmnop"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); ssize_t halfbufsize = bufsize / 2; expect_lookup(RELPATH, ino, 0); expect_open(ino, FOPEN_DIRECT_IO, 1); expect_write(ino, 0, bufsize, halfbufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(halfbufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * An insidious edge case: the filesystem returns a short write, and the * difference between what we requested and what it actually wrote crosses an * iov element boundary */ TEST_F(Write, direct_io_short_write_iov) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS0 = "abcdefgh"; const char *CONTENTS1 = "ijklmnop"; const char *EXPECTED0 = "abcdefghijklmnop"; uint64_t ino = 42; int fd; ssize_t size0 = strlen(CONTENTS0) - 1; ssize_t size1 = strlen(CONTENTS1) + 1; ssize_t totalsize = size0 + size1; struct iovec iov[2]; expect_lookup(RELPATH, ino, 0); expect_open(ino, FOPEN_DIRECT_IO, 1); expect_write(ino, 0, totalsize, size0, EXPECTED0); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); iov[0].iov_base = (void*)CONTENTS0; iov[0].iov_len = strlen(CONTENTS0); iov[1].iov_base = (void*)CONTENTS1; iov[1].iov_len = strlen(CONTENTS1); ASSERT_EQ(size0, writev(fd, iov, 2)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* fusefs should respect RLIMIT_FSIZE */ TEST_F(Write, rlimit_fsize) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; struct rlimit rl; ssize_t bufsize = strlen(CONTENTS); off_t offset = 1'000'000'000; uint64_t ino = 42; int fd; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); rl.rlim_cur = offset; rl.rlim_max = 10 * offset; ASSERT_EQ(0, setrlimit(RLIMIT_FSIZE, &rl)) << strerror(errno); ASSERT_NE(SIG_ERR, signal(SIGXFSZ, sigxfsz_handler)) << strerror(errno); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(-1, pwrite(fd, CONTENTS, bufsize, offset)); EXPECT_EQ(EFBIG, errno); EXPECT_EQ(1, s_sigxfsz); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * A short read indicates EOF. Test that nothing bad happens if we get EOF * during the R of a RMW operation. */ TEST_F(WriteCacheable, eof_during_rmw) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; const char *INITIAL = "XXXXXXXXXX"; uint64_t ino = 42; uint64_t offset = 1; ssize_t bufsize = strlen(CONTENTS); off_t orig_fsize = 10; off_t truncated_fsize = 5; off_t final_fsize = bufsize; int fd; FuseTest::expect_lookup(RELPATH, ino, S_IFREG | 0644, orig_fsize, 1); expect_open(ino, 0, 1); expect_read(ino, 0, orig_fsize, truncated_fsize, INITIAL, O_RDWR); expect_getattr(ino, truncated_fsize); expect_read(ino, 0, final_fsize, final_fsize, INITIAL, O_RDWR); maybe_expect_write(ino, offset, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * If the kernel cannot be sure which uid, gid, or pid was responsible for a * write, then it must set the FUSE_WRITE_CACHE bit */ /* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236378 */ TEST_F(WriteCacheable, mmap) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); void *p; uint64_t offset = 10; size_t len; void *zeros, *expected; len = getpagesize(); zeros = calloc(1, len); ASSERT_NE(NULL, zeros); expected = calloc(1, len); ASSERT_NE(NULL, expected); memmove((uint8_t*)expected + offset, CONTENTS, bufsize); expect_lookup(RELPATH, ino, len); expect_open(ino, 0, 1); expect_read(ino, 0, len, len, zeros); /* * Writes from the pager may or may not be associated with the correct * pid, so they must set FUSE_WRITE_CACHE. */ FuseTest::expect_write(ino, 0, len, len, FUSE_WRITE_CACHE, 0, expected); expect_flush(ino, 1, ReturnErrno(0)); expect_release(ino, ReturnErrno(0)); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ASSERT_NE(MAP_FAILED, p) << strerror(errno); memmove((uint8_t*)p + offset, CONTENTS, bufsize); ASSERT_EQ(0, munmap(p, len)) << strerror(errno); close(fd); // Write mmap'd data on close free(expected); free(zeros); } TEST_F(WriteThrough, pwrite) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; uint64_t offset = m_maxbcachebuf; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, offset, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } +/* Writing a file should update its cached mtime and ctime */ +TEST_F(Write, timestamps) +{ + const char FULLPATH[] = "mountpoint/some_file.txt"; + const char RELPATH[] = "some_file.txt"; + const char *CONTENTS = "abcdefgh"; + ssize_t bufsize = strlen(CONTENTS); + uint64_t ino = 42; + struct stat sb0, sb1; + int fd; + + expect_lookup(RELPATH, ino, 0); + expect_open(ino, 0, 1); + maybe_expect_write(ino, 0, bufsize, CONTENTS); + + fd = open(FULLPATH, O_RDWR); + EXPECT_LE(0, fd) << strerror(errno); + ASSERT_EQ(0, fstat(fd, &sb0)) << strerror(errno); + ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); + + nap(); + + ASSERT_EQ(0, fstat(fd, &sb1)) << strerror(errno); + + EXPECT_EQ(sb0.st_atime, sb1.st_atime); + EXPECT_NE(sb0.st_mtime, sb1.st_mtime); + EXPECT_NE(sb0.st_ctime, sb1.st_ctime); +} + TEST_F(Write, write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* fuse(4) should not issue writes of greater size than the daemon requests */ TEST_F(Write, write_large) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; int *contents; uint64_t ino = 42; int fd; ssize_t halfbufsize, bufsize; halfbufsize = m_mock->m_maxwrite; bufsize = halfbufsize * 2; contents = (int*)malloc(bufsize); ASSERT_NE(NULL, contents); for (int i = 0; i < (int)bufsize / (int)sizeof(i); i++) { contents[i] = i; } expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); maybe_expect_write(ino, 0, halfbufsize, contents); maybe_expect_write(ino, halfbufsize, halfbufsize, &contents[halfbufsize / sizeof(int)]); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, contents, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ free(contents); } TEST_F(Write, write_nothing) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = ""; uint64_t ino = 42; int fd; ssize_t bufsize = 0; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } TEST_F(Write_7_8, write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write_7_8(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* In writeback mode, dirty data should be written on close */ TEST_F(WriteBackAsync, close) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_SETATTR); }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; // Must match nodeid }))); expect_flush(ino, 1, ReturnErrno(0)); expect_release(ino, ReturnErrno(0)); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); close(fd); } /* In writeback mode, adjacent writes will be clustered together */ TEST_F(WriteCluster, clustering) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int i, fd; void *wbuf, *wbuf2x; ssize_t bufsize = m_maxbcachebuf; off_t filesize = 5 * bufsize; wbuf = malloc(bufsize); ASSERT_NE(NULL, wbuf) << strerror(errno); memset(wbuf, 'X', bufsize); wbuf2x = malloc(2 * bufsize); ASSERT_NE(NULL, wbuf2x) << strerror(errno); memset(wbuf2x, 'X', 2 * bufsize); expect_lookup(RELPATH, ino, filesize); expect_open(ino, 0, 1); /* * Writes of bufsize-bytes each should be clustered into greater sizes. * The amount of clustering is adaptive, so the first write actually * issued will be 2x bufsize and subsequent writes may be larger */ expect_write(ino, 0, 2 * bufsize, 2 * bufsize, wbuf2x); expect_write(ino, 2 * bufsize, 2 * bufsize, 2 * bufsize, wbuf2x); expect_flush(ino, 1, ReturnErrno(0)); expect_release(ino, ReturnErrno(0)); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); for (i = 0; i < 4; i++) { ASSERT_EQ(bufsize, write(fd, wbuf, bufsize)) << strerror(errno); } close(fd); } /* * When clustering writes, an I/O error to any of the cluster's children should * not panic the system on unmount */ /* * Disabled because it panics. * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=238565 */ TEST_F(WriteCluster, DISABLED_cluster_write_err) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int i, fd; void *wbuf; ssize_t bufsize = m_maxbcachebuf; off_t filesize = 4 * bufsize; wbuf = malloc(bufsize); ASSERT_NE(NULL, wbuf) << strerror(errno); memset(wbuf, 'X', bufsize); expect_lookup(RELPATH, ino, filesize); expect_open(ino, 0, 1); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_WRITE); }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnErrno(EIO))); expect_flush(ino, 1, ReturnErrno(0)); expect_release(ino, ReturnErrno(0)); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); for (i = 0; i < 3; i++) { ASSERT_EQ(bufsize, write(fd, wbuf, bufsize)) << strerror(errno); } close(fd); } /* * In writeback mode, writes to an O_WRONLY file could trigger reads from the * server. The FUSE protocol explicitly allows that. */ TEST_F(WriteBack, rmw) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; const char *INITIAL = "XXXXXXXXXX"; uint64_t ino = 42; uint64_t offset = 1; off_t fsize = 10; int fd; ssize_t bufsize = strlen(CONTENTS); FuseTest::expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1); expect_open(ino, 0, 1); expect_read(ino, 0, fsize, fsize, INITIAL, O_WRONLY); maybe_expect_write(ino, offset, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * Without direct_io, writes should be committed to cache */ TEST_F(WriteBack, cache) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); char readbuf[bufsize]; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* * A subsequent read should be serviced by cache, without querying the * filesystem daemon */ ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * With O_DIRECT, writes should be not committed to cache. Admittedly this is * an odd test, because it would be unusual to use O_DIRECT for writes but not * reads. */ TEST_F(WriteBack, o_direct) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); char readbuf[bufsize]; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); FuseTest::expect_write(ino, 0, bufsize, bufsize, 0, FUSE_WRITE_CACHE, CONTENTS); expect_read(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR | O_DIRECT); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* A subsequent read must query the daemon because cache is empty */ ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * When mounted with -o async, the writeback cache mode should delay writes */ TEST_F(WriteBackAsync, delay) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); /* Write should be cached, but FUSE_WRITE shouldn't be sent */ EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_WRITE); }, Eq(true)), _) ).Times(0); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Don't close the file because that would flush the cache */ } /* * In WriteBack mode, writes may be cached beyond what the server thinks is the * EOF. In this case, a short read at EOF should _not_ cause fusefs to update * the file's size. */ TEST_F(WriteBackAsync, eof) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS0 = "abcdefgh"; const char *CONTENTS1 = "ijklmnop"; uint64_t ino = 42; int fd; off_t offset = m_maxbcachebuf; ssize_t wbufsize = strlen(CONTENTS1); off_t old_filesize = (off_t)strlen(CONTENTS0); ssize_t rbufsize = 2 * old_filesize; char readbuf[rbufsize]; size_t holesize = rbufsize - old_filesize; char hole[holesize]; struct stat sb; ssize_t r; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_read(ino, 0, m_maxbcachebuf, old_filesize, CONTENTS0); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); /* Write and cache data beyond EOF */ ASSERT_EQ(wbufsize, pwrite(fd, CONTENTS1, wbufsize, offset)) << strerror(errno); /* Read from the old EOF */ r = pread(fd, readbuf, rbufsize, 0); ASSERT_LE(0, r) << strerror(errno); EXPECT_EQ(rbufsize, r) << "read should've synthesized a hole"; EXPECT_EQ(0, memcmp(CONTENTS0, readbuf, old_filesize)); bzero(hole, holesize); EXPECT_EQ(0, memcmp(hole, readbuf + old_filesize, holesize)); /* The file's size should still be what was established by pwrite */ ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno); EXPECT_EQ(offset + wbufsize, sb.st_size); /* Deliberately leak fd. close(2) will be tested in release.cc */ +} + +/* + * When a file has dirty writes that haven't been flushed, the server's notion + * of its mtime and ctime will be wrong. The kernel should ignore those if it + * gets them from a FUSE_GETATTR before flushing. + */ +TEST_F(WriteBackAsync, timestamps) +{ + const char FULLPATH[] = "mountpoint/some_file.txt"; + const char RELPATH[] = "some_file.txt"; + const char *CONTENTS = "abcdefgh"; + ssize_t bufsize = strlen(CONTENTS); + uint64_t ino = 42; + uint64_t attr_valid = 0; + uint64_t attr_valid_nsec = 0; + uint64_t server_time = 12345; + mode_t mode = S_IFREG | 0644; + int fd; + + struct stat sb; + + EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) + .WillRepeatedly(Invoke( + ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, entry); + out.body.entry.attr.mode = mode; + out.body.entry.nodeid = ino; + out.body.entry.attr.nlink = 1; + out.body.entry.attr_valid = attr_valid; + out.body.entry.attr_valid_nsec = attr_valid_nsec; + }))); + expect_open(ino, 0, 1); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_GETATTR && + in.header.nodeid == ino); + }, Eq(true)), + _) + ).WillRepeatedly(Invoke( + ReturnImmediate([=](auto i __unused, auto& out) { + SET_OUT_HEADER_LEN(out, attr); + out.body.attr.attr.ino = ino; + out.body.attr.attr.mode = mode; + out.body.attr.attr_valid = attr_valid; + out.body.attr.attr_valid_nsec = attr_valid_nsec; + out.body.attr.attr.atime = server_time; + out.body.attr.attr.mtime = server_time; + out.body.attr.attr.ctime = server_time; + }))); + + fd = open(FULLPATH, O_RDWR); + EXPECT_LE(0, fd) << strerror(errno); + ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); + + ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno); + EXPECT_EQ((time_t)server_time, sb.st_atime); + EXPECT_NE((time_t)server_time, sb.st_mtime); + EXPECT_NE((time_t)server_time, sb.st_ctime); +} + +/* Any dirty timestamp fields should be flushed during a SETATTR */ +TEST_F(WriteBackAsync, timestamps_during_setattr) +{ + const char FULLPATH[] = "mountpoint/some_file.txt"; + const char RELPATH[] = "some_file.txt"; + const char *CONTENTS = "abcdefgh"; + ssize_t bufsize = strlen(CONTENTS); + uint64_t ino = 42; + const mode_t newmode = 0755; + int fd; + + expect_lookup(RELPATH, ino, 0); + expect_open(ino, 0, 1); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + /* In protocol 7.23, ctime will be changed too */ + uint32_t valid = FATTR_MODE | FATTR_MTIME; + return (in.header.opcode == FUSE_SETATTR && + in.header.nodeid == ino && + in.body.setattr.valid == valid); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, attr); + out.body.attr.attr.ino = ino; + out.body.attr.attr.mode = S_IFREG | newmode; + }))); + + fd = open(FULLPATH, O_RDWR); + EXPECT_LE(0, fd) << strerror(errno); + ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); + ASSERT_EQ(0, fchmod(fd, newmode)) << strerror(errno); } /* * Without direct_io, writes should be committed to cache */ TEST_F(WriteThrough, writethrough) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); char readbuf[bufsize]; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* * A subsequent read should be serviced by cache, without querying the * filesystem daemon */ ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* Writes that extend a file should update the cached file size */ TEST_F(Write, update_file_size) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; struct stat sb; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Get cached attributes */ ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno); ASSERT_EQ(bufsize, sb.st_size); /* Deliberately leak fd. close(2) will be tested in release.cc */ }