Index: projects/fuse2/sys/fs/fuse/fuse_file.c =================================================================== --- projects/fuse2/sys/fs/fuse/fuse_file.c (revision 348318) +++ projects/fuse2/sys/fs/fuse/fuse_file.c (revision 348319) @@ -1,396 +1,358 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_file.h" #include "fuse_internal.h" #include "fuse_io.h" #include "fuse_ipc.h" #include "fuse_node.h" MALLOC_DEFINE(M_FUSE_FILEHANDLE, "fuse_filefilehandle", "FUSE file handle"); SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , file, trace, "int", "char*"); static int fuse_fh_count = 0; SYSCTL_INT(_vfs_fusefs, OID_AUTO, filehandle_count, CTLFLAG_RD, &fuse_fh_count, 0, "number of open FUSE filehandles"); /* Get the FUFH type for a particular access mode */ static inline fufh_type_t fflags_2_fufh_type(int fflags) { if ((fflags & FREAD) && (fflags & FWRITE)) return FUFH_RDWR; else if (fflags & (FWRITE)) return FUFH_WRONLY; else if (fflags & (FREAD)) return FUFH_RDONLY; else if (fflags & (FEXEC)) return FUFH_EXEC; else panic("FUSE: What kind of a flag is this (%x)?", fflags); } -/* - * Get the flags to use for FUSE_CREATE, FUSE_OPEN and FUSE_RELEASE - * - * These are supposed to be the same as the flags argument to open(2). - * However, since we can't reliably associate a fuse_filehandle with a specific - * file descriptor it would would be dangerous to include anything more than - * the access mode flags. For example, suppose we open a file twice, once with - * O_APPEND and once without. Then the user pwrite(2)s to offset using the - * second file descriptor. If fusefs uses the first file handle, then the - * server may append the write to the end of the file rather than at offset 0. - * To prevent problems like this, we only ever send the portion of flags - * related to access mode. - * - * It's essential to send that portion, because FUSE uses it for server-side - * authorization. - * - * TODO: consider sending O_APPEND after upgrading to protocol 7.9, which - * includes flags in fuse_write_in. - */ -static inline int -fufh_type_2_fflags(fufh_type_t type) -{ - int oflags = -1; - - switch (type) { - case FUFH_RDONLY: - case FUFH_WRONLY: - case FUFH_RDWR: - case FUFH_EXEC: - oflags = type; - break; - default: - break; - } - - return oflags; -} - int fuse_filehandle_open(struct vnode *vp, int a_mode, struct fuse_filehandle **fufhp, struct thread *td, struct ucred *cred) { struct fuse_dispatcher fdi; struct fuse_open_in *foi; struct fuse_open_out *foo; fufh_type_t fufh_type; int err = 0; int oflags = 0; int op = FUSE_OPEN; fufh_type = fflags_2_fufh_type(a_mode); oflags = fufh_type_2_fflags(fufh_type); if (vnode_isdir(vp)) { op = FUSE_OPENDIR; /* vn_open_vnode already rejects FWRITE on directories */ MPASS(fufh_type == FUFH_RDONLY || fufh_type == FUFH_EXEC); } fdisp_init(&fdi, sizeof(*foi)); fdisp_make_vp(&fdi, op, vp, td, cred); foi = fdi.indata; foi->flags = oflags; if ((err = fdisp_wait_answ(&fdi))) { SDT_PROBE2(fusefs, , file, trace, 1, "OUCH ... daemon didn't give fh"); if (err == ENOENT) { fuse_internal_vnode_disappear(vp); } goto out; } foo = fdi.answ; fuse_filehandle_init(vp, fufh_type, fufhp, td, cred, foo); fuse_vnode_open(vp, foo->open_flags, td); out: fdisp_destroy(&fdi); return err; } int fuse_filehandle_close(struct vnode *vp, struct fuse_filehandle *fufh, struct thread *td, struct ucred *cred) { struct fuse_dispatcher fdi; struct fuse_release_in *fri; int err = 0; int op = FUSE_RELEASE; if (fuse_isdeadfs(vp)) { goto out; } if (vnode_isdir(vp)) op = FUSE_RELEASEDIR; fdisp_init(&fdi, sizeof(*fri)); fdisp_make_vp(&fdi, op, vp, td, cred); fri = fdi.indata; fri->fh = fufh->fh_id; fri->flags = fufh_type_2_fflags(fufh->fufh_type); /* * If the file has a POSIX lock then we're supposed to set lock_owner. * If not, then lock_owner is undefined. So we may as well always set * it. */ fri->lock_owner = td->td_proc->p_pid; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); out: atomic_subtract_acq_int(&fuse_fh_count, 1); LIST_REMOVE(fufh, next); free(fufh, M_FUSE_FILEHANDLE); return err; } /* * Check for a valid file handle, first the type requested, but if that * isn't valid, try for FUFH_RDWR. * Return true if there is any file handle with the correct credentials and * a fufh type that includes the provided one. * A pid of 0 means "don't care" */ bool fuse_filehandle_validrw(struct vnode *vp, int mode, struct ucred *cred, pid_t pid) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh; fufh_type_t fufh_type = fflags_2_fufh_type(mode); /* * Unlike fuse_filehandle_get, we want to search for a filehandle with * the exact cred, and no fallback */ LIST_FOREACH(fufh, &fvdat->handles, next) { if (fufh->fufh_type == fufh_type && fufh->uid == cred->cr_uid && fufh->gid == cred->cr_rgid && (pid == 0 || fufh->pid == pid)) return true; } if (fufh_type == FUFH_EXEC) return false; /* Fallback: find a RDWR list entry with the right cred */ LIST_FOREACH(fufh, &fvdat->handles, next) { if (fufh->fufh_type == FUFH_RDWR && fufh->uid == cred->cr_uid && fufh->gid == cred->cr_rgid && (pid == 0 || fufh->pid == pid)) return true; } return false; } int fuse_filehandle_get(struct vnode *vp, int fflag, struct fuse_filehandle **fufhp, struct ucred *cred, pid_t pid) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh; fufh_type_t fufh_type; fufh_type = fflags_2_fufh_type(fflag); /* cred can be NULL for in-kernel clients */ if (cred == NULL) goto fallback; LIST_FOREACH(fufh, &fvdat->handles, next) { if (fufh->fufh_type == fufh_type && fufh->uid == cred->cr_uid && fufh->gid == cred->cr_rgid && (pid == 0 || fufh->pid == pid)) goto found; } fallback: /* Fallback: find a list entry with the right flags */ LIST_FOREACH(fufh, &fvdat->handles, next) { if (fufh->fufh_type == fufh_type) break; } if (fufh == NULL) return EBADF; found: if (fufhp != NULL) *fufhp = fufh; return 0; } /* Get a file handle with any kind of flags */ int fuse_filehandle_get_anyflags(struct vnode *vp, struct fuse_filehandle **fufhp, struct ucred *cred, pid_t pid) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh; if (cred == NULL) goto fallback; LIST_FOREACH(fufh, &fvdat->handles, next) { if (fufh->uid == cred->cr_uid && fufh->gid == cred->cr_rgid && (pid == 0 || fufh->pid == pid)) goto found; } fallback: /* Fallback: find any list entry */ fufh = LIST_FIRST(&fvdat->handles); if (fufh == NULL) return EBADF; found: if (fufhp != NULL) *fufhp = fufh; return 0; } int fuse_filehandle_getrw(struct vnode *vp, int fflag, struct fuse_filehandle **fufhp, struct ucred *cred, pid_t pid) { int err; err = fuse_filehandle_get(vp, fflag, fufhp, cred, pid); if (err) err = fuse_filehandle_get(vp, FREAD | FWRITE, fufhp, cred, pid); return err; } void fuse_filehandle_init(struct vnode *vp, fufh_type_t fufh_type, struct fuse_filehandle **fufhp, struct thread *td, struct ucred *cred, struct fuse_open_out *foo) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh; fufh = malloc(sizeof(struct fuse_filehandle), M_FUSE_FILEHANDLE, M_WAITOK); MPASS(fufh != NULL); fufh->fh_id = foo->fh; fufh->fufh_type = fufh_type; fufh->gid = cred->cr_rgid; fufh->uid = cred->cr_uid; fufh->pid = td->td_proc->p_pid; fufh->fuse_open_flags = foo->open_flags; if (!FUFH_IS_VALID(fufh)) { panic("FUSE: init: invalid filehandle id (type=%d)", fufh_type); } LIST_INSERT_HEAD(&fvdat->handles, fufh, next); if (fufhp != NULL) *fufhp = fufh; atomic_add_acq_int(&fuse_fh_count, 1); if (foo->open_flags & FOPEN_DIRECT_IO) { ASSERT_VOP_ELOCKED(vp, __func__); VTOFUD(vp)->flag |= FN_DIRECTIO; fuse_io_invalbuf(vp, td); } else { if ((foo->open_flags & FOPEN_KEEP_CACHE) == 0) fuse_io_invalbuf(vp, td); VTOFUD(vp)->flag &= ~FN_DIRECTIO; } } Index: projects/fuse2/sys/fs/fuse/fuse_file.h =================================================================== --- projects/fuse2/sys/fs/fuse/fuse_file.h (revision 348318) +++ projects/fuse2/sys/fs/fuse/fuse_file.h (revision 348319) @@ -1,172 +1,207 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FUSE_FILE_H_ #define _FUSE_FILE_H_ #include #include #include #include #include /* * The fufh type is the access mode of the fuse file handle. It's the portion * of the open(2) flags related to permission. */ typedef enum fufh_type { FUFH_INVALID = -1, FUFH_RDONLY = O_RDONLY, FUFH_WRONLY = O_WRONLY, FUFH_RDWR = O_RDWR, FUFH_EXEC = O_EXEC, } fufh_type_t; /* * FUSE File Handles * * The FUSE protocol says that a server may assign a unique 64-bit file handle * every time that a file is opened. Effectively, that's once for each file * descriptor. * * Unfortunately, the VFS doesn't help us here. VOPs don't have a * struct file* argument. fileops do, but many syscalls bypass the fileops * layer and go straight to a vnode. Some, like writing from cache, can't * track a file handle even in theory. The entire concept of the file handle * is a product of FUSE's Linux origins; Linux lacks vnodes and almost every * file system operation takes a struct file* argument. * * Since FreeBSD's VFS is more file descriptor-agnostic, we must store FUSE * filehandles in the vnode. One option would be to only store a single file * handle and never open FUSE files concurrently. That's what NetBSD does. * But that violates FUSE's security model. FUSE expects the server to do all * authorization (except when mounted with -o default_permissions). In order * to do that, the server needs us to send FUSE_OPEN every time somebody opens * a new file descriptor. * * Another option would be to never open FUSE files concurrently, but send a * FUSE_ACCESS prior to every open after the first. That would give the server * the opportunity to authorize the access. Unfortunately, the FUSE protocol * makes ACCESS optional. File systems that don't implement it are assumed to * authorize everything. A survey of 32 fuse file systems showed that only 14 * implemented access. Among the laggards were a few that really ought to be * doing server-side authorization. * * So we do something hacky, similar to what OpenBSD, Illumos, and OSXFuse do. * we store a list of file handles, one for each combination of vnode, uid, * gid, pid, and access mode. When opening a file, we first check whether * there's already a matching file handle. If so, we reuse it. If not, we * send FUSE_OPEN and create a new file handle. That minimizes the number of * open file handles while still allowing the server to authorize stuff. * * VOPs that need a file handle search through the list for a close match. * They can't be guaranteed of finding an exact match because, for example, a * process may have changed its UID since opening the file. Also, most VOPs * don't know exactly what permission they need. Is O_RDWR required or is * O_RDONLY good enough? So the file handle we end up using may not be exactly * the one we're supposed to use with that file descriptor. But if the FUSE * file system isn't too picky, it will work. (FWIW even Linux sometimes * guesses the file handle, during writes from cache or most SETATTR * operations). * * I suspect this mess is part of the reason why neither NFS nor 9P have an * equivalent of FUSE file handles. */ struct fuse_filehandle { LIST_ENTRY(fuse_filehandle) next; /* The filehandle returned by FUSE_OPEN */ uint64_t fh_id; /* flags returned by FUSE_OPEN */ uint32_t fuse_open_flags; /* The access mode of the file handle */ fufh_type_t fufh_type; /* Credentials used to open the file */ gid_t gid; pid_t pid; uid_t uid; }; #define FUFH_IS_VALID(f) ((f)->fufh_type != FUFH_INVALID) +/* + * Get the flags to use for FUSE_CREATE, FUSE_OPEN and FUSE_RELEASE + * + * These are supposed to be the same as the flags argument to open(2). + * However, since we can't reliably associate a fuse_filehandle with a specific + * file descriptor it would would be dangerous to include anything more than + * the access mode flags. For example, suppose we open a file twice, once with + * O_APPEND and once without. Then the user pwrite(2)s to offset using the + * second file descriptor. If fusefs uses the first file handle, then the + * server may append the write to the end of the file rather than at offset 0. + * To prevent problems like this, we only ever send the portion of flags + * related to access mode. + * + * It's essential to send that portion, because FUSE uses it for server-side + * authorization. + */ +static inline int +fufh_type_2_fflags(fufh_type_t type) +{ + int oflags = -1; + + switch (type) { + case FUFH_RDONLY: + case FUFH_WRONLY: + case FUFH_RDWR: + case FUFH_EXEC: + oflags = type; + break; + default: + break; + } + + return oflags; +} + bool fuse_filehandle_validrw(struct vnode *vp, int mode, struct ucred *cred, pid_t pid); int fuse_filehandle_get(struct vnode *vp, int fflag, struct fuse_filehandle **fufhp, struct ucred *cred, pid_t pid); int fuse_filehandle_get_anyflags(struct vnode *vp, struct fuse_filehandle **fufhp, struct ucred *cred, pid_t pid); int fuse_filehandle_getrw(struct vnode *vp, int fflag, struct fuse_filehandle **fufhp, struct ucred *cred, pid_t pid); void fuse_filehandle_init(struct vnode *vp, fufh_type_t fufh_type, struct fuse_filehandle **fufhp, struct thread *td, struct ucred *cred, struct fuse_open_out *foo); int fuse_filehandle_open(struct vnode *vp, int mode, struct fuse_filehandle **fufhp, struct thread *td, struct ucred *cred); int fuse_filehandle_close(struct vnode *vp, struct fuse_filehandle *fufh, struct thread *td, struct ucred *cred); #endif /* _FUSE_FILE_H_ */ Index: projects/fuse2/sys/fs/fuse/fuse_internal.c =================================================================== --- projects/fuse2/sys/fs/fuse/fuse_internal.c (revision 348318) +++ projects/fuse2/sys/fs/fuse/fuse_internal.c (revision 348319) @@ -1,949 +1,959 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_file.h" #include "fuse_internal.h" #include "fuse_ipc.h" #include "fuse_node.h" #include "fuse_file.h" SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*"); #ifdef ZERO_PAD_INCOMPLETE_BUFS static int isbzero(void *buf, size_t len); #endif /* Synchronously send a FUSE_ACCESS operation */ int fuse_internal_access(struct vnode *vp, accmode_t mode, struct thread *td, struct ucred *cred) { int err = 0; uint32_t mask = F_OK; int dataflags; int vtype; struct mount *mp; struct fuse_dispatcher fdi; struct fuse_access_in *fai; struct fuse_data *data; mp = vnode_mount(vp); vtype = vnode_vtype(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; if (mode == 0) return 0; if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) { switch (vp->v_type) { case VDIR: /* FALLTHROUGH */ case VLNK: /* FALLTHROUGH */ case VREG: return EROFS; default: break; } } /* Unless explicitly permitted, deny everyone except the fs owner. */ if (!(dataflags & FSESS_DAEMON_CAN_SPY)) { if (fuse_match_cred(data->daemoncred, cred)) return EPERM; } if (dataflags & FSESS_DEFAULT_PERMISSIONS) { struct vattr va; fuse_internal_getattr(vp, &va, cred, td); return vaccess(vp->v_type, va.va_mode, va.va_uid, va.va_gid, mode, cred, NULL); } if (!fsess_isimpl(mp, FUSE_ACCESS)) return 0; if ((mode & (VWRITE | VAPPEND | VADMIN)) != 0) mask |= W_OK; if ((mode & VREAD) != 0) mask |= R_OK; if ((mode & VEXEC) != 0) mask |= X_OK; fdisp_init(&fdi, sizeof(*fai)); fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred); fai = fdi.indata; fai->mask = mask; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_ACCESS); err = 0; } return err; } /* * Cache FUSE attributes from attr, in attribute cache associated with vnode * 'vp'. Optionally, if argument 'vap' is not NULL, store a copy of the * converted attributes there as well. * * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do * return the result to the caller). */ void fuse_internal_cache_attrs(struct vnode *vp, struct ucred *cred, struct fuse_attr *attr, uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap) { struct mount *mp; struct fuse_vnode_data *fvdat; struct fuse_data *data; struct vattr *vp_cache_at; mp = vnode_mount(vp); fvdat = VTOFUD(vp); data = fuse_get_mpdata(mp); if (!cred) cred = curthread->td_ucred; ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs"); fuse_validity_2_bintime(attr_valid, attr_valid_nsec, &fvdat->attr_cache_timeout); /* Fix our buffers if the filesize changed without us knowing */ if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) { (void)fuse_vnode_setsize(vp, cred, attr->size); fvdat->cached_attrs.va_size = attr->size; } if (attr_valid > 0 || attr_valid_nsec > 0) vp_cache_at = &(fvdat->cached_attrs); else if (vap != NULL) vp_cache_at = vap; else return; vattr_null(vp_cache_at); vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0]; vp_cache_at->va_fileid = attr->ino; vp_cache_at->va_mode = attr->mode & ~S_IFMT; vp_cache_at->va_nlink = attr->nlink; vp_cache_at->va_uid = attr->uid; vp_cache_at->va_gid = attr->gid; vp_cache_at->va_rdev = attr->rdev; vp_cache_at->va_size = attr->size; /* XXX on i386, seconds are truncated to 32 bits */ vp_cache_at->va_atime.tv_sec = attr->atime; vp_cache_at->va_atime.tv_nsec = attr->atimensec; vp_cache_at->va_mtime.tv_sec = attr->mtime; vp_cache_at->va_mtime.tv_nsec = attr->mtimensec; vp_cache_at->va_ctime.tv_sec = attr->ctime; vp_cache_at->va_ctime.tv_nsec = attr->ctimensec; if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0) vp_cache_at->va_blocksize = attr->blksize; else vp_cache_at->va_blocksize = PAGE_SIZE; vp_cache_at->va_type = IFTOVT(attr->mode); vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE; vp_cache_at->va_flags = 0; if (vap != vp_cache_at && vap != NULL) memcpy(vap, vp_cache_at, sizeof(*vap)); } /* fsync */ int fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio) { if (tick->tk_aw_ohead.error == ENOSYS) { fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick)); } return 0; } int fuse_internal_fsync(struct vnode *vp, struct thread *td, int waitfor, bool datasync) { struct fuse_fsync_in *ffsi = NULL; struct fuse_dispatcher fdi; struct fuse_filehandle *fufh; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct mount *mp = vnode_mount(vp); int op = FUSE_FSYNC; int err = 0; if (!fsess_isimpl(vnode_mount(vp), (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) { return 0; } if (vnode_isdir(vp)) op = FUSE_FSYNCDIR; if (!fsess_isimpl(mp, op)) return 0; fdisp_init(&fdi, sizeof(*ffsi)); /* * fsync every open file handle for this file, because we can't be sure * which file handle the caller is really referring to. */ LIST_FOREACH(fufh, &fvdat->handles, next) { if (ffsi == NULL) fdisp_make_vp(&fdi, op, vp, td, NULL); else fdisp_refresh_vp(&fdi, op, vp, td, NULL); ffsi = fdi.indata; ffsi->fh = fufh->fh_id; ffsi->fsync_flags = 0; if (datasync) ffsi->fsync_flags = 1; if (waitfor == MNT_WAIT) { err = fdisp_wait_answ(&fdi); } else { fuse_insert_callback(fdi.tick, fuse_internal_fsync_callback); fuse_insert_message(fdi.tick, false); } if (err == ENOSYS) { /* ENOSYS means "success, and don't call again" */ fsess_set_notimpl(mp, op); err = 0; break; } } fdisp_destroy(&fdi); return err; } /* mknod */ int fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct fuse_mknod_in fmni; fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode); fmni.rdev = vap->va_rdev; return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni, sizeof(fmni), vap->va_type)); } /* readdir */ int fuse_internal_readdir(struct vnode *vp, struct uio *uio, off_t startoff, struct fuse_filehandle *fufh, struct fuse_iov *cookediov, int *ncookies, u_long *cookies) { int err = 0; struct fuse_dispatcher fdi; struct fuse_read_in *fri = NULL; int fnd_start; if (uio_resid(uio) == 0) return 0; fdisp_init(&fdi, 0); /* * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p * I/O). */ /* * fnd_start is set non-zero once the offset in the directory gets * to the startoff. This is done because directories must be read * from the beginning (offset == 0) when fuse_vnop_readdir() needs * to do an open of the directory. * If it is not set non-zero here, it will be set non-zero in * fuse_internal_readdir_processdata() when uio_offset == startoff. */ fnd_start = 0; if (uio->uio_offset == startoff) fnd_start = 1; while (uio_resid(uio) > 0) { fdi.iosize = sizeof(*fri); if (fri == NULL) fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); else fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); fri = fdi.indata; fri->fh = fufh->fh_id; fri->offset = uio_offset(uio); fri->size = MIN(uio->uio_resid, fuse_get_mpdata(vp->v_mount)->max_read); if ((err = fdisp_wait_answ(&fdi))) break; if ((err = fuse_internal_readdir_processdata(uio, startoff, &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov, ncookies, &cookies))) break; } fdisp_destroy(&fdi); return ((err == -1) ? 0 : err); } /* * Return -1 to indicate that this readdir is finished, 0 if it copied * all the directory data read in and it may be possible to read more * and greater than 0 for a failure. */ int fuse_internal_readdir_processdata(struct uio *uio, off_t startoff, int *fnd_start, size_t reqsize, void *buf, size_t bufsize, struct fuse_iov *cookediov, int *ncookies, u_long **cookiesp) { int err = 0; int bytesavail; size_t freclen; struct dirent *de; struct fuse_dirent *fudge; u_long *cookies; cookies = *cookiesp; if (bufsize < FUSE_NAME_OFFSET) return -1; for (;;) { if (bufsize < FUSE_NAME_OFFSET) { err = -1; break; } fudge = (struct fuse_dirent *)buf; freclen = FUSE_DIRENT_SIZE(fudge); if (bufsize < freclen) { /* * This indicates a partial directory entry at the * end of the directory data. */ err = -1; break; } #ifdef ZERO_PAD_INCOMPLETE_BUFS if (isbzero(buf, FUSE_NAME_OFFSET)) { err = -1; break; } #endif if (!fudge->namelen || fudge->namelen > MAXNAMLEN) { err = EINVAL; break; } bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *) &fudge->namelen); if (bytesavail > uio_resid(uio)) { /* Out of space for the dir so we are done. */ err = -1; break; } /* * Don't start to copy the directory entries out until * the requested offset in the directory is found. */ if (*fnd_start != 0) { fiov_adjust(cookediov, bytesavail); bzero(cookediov->base, bytesavail); de = (struct dirent *)cookediov->base; de->d_fileno = fudge->ino; de->d_reclen = bytesavail; de->d_type = fudge->type; de->d_namlen = fudge->namelen; memcpy((char *)cookediov->base + sizeof(struct dirent) - MAXNAMLEN - 1, (char *)buf + FUSE_NAME_OFFSET, fudge->namelen); dirent_terminate(de); err = uiomove(cookediov->base, cookediov->len, uio); if (err) break; if (cookies != NULL) { if (*ncookies == 0) { err = -1; break; } *cookies = fudge->off; cookies++; (*ncookies)--; } } else if (startoff == fudge->off) *fnd_start = 1; buf = (char *)buf + freclen; bufsize -= freclen; uio_setoffset(uio, fudge->off); } *cookiesp = cookies; return err; } /* remove */ int fuse_internal_remove(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, enum fuse_opcode op) { struct fuse_dispatcher fdi; int err = 0; fdisp_init(&fdi, cnp->cn_namelen + 1); fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred); memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); return err; } /* rename */ int fuse_internal_rename(struct vnode *fdvp, struct componentname *fcnp, struct vnode *tdvp, struct componentname *tcnp) { struct fuse_dispatcher fdi; struct fuse_rename_in *fri; int err = 0; fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2); fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred); fri = fdi.indata; fri->newdir = VTOI(tdvp); memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr, fcnp->cn_namelen); ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0'; memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1, tcnp->cn_nameptr, tcnp->cn_namelen); ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 1] = '\0'; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); return err; } /* strategy */ /* entity creation */ void fuse_internal_newentry_makerequest(struct mount *mp, uint64_t dnid, struct componentname *cnp, enum fuse_opcode op, void *buf, size_t bufsize, struct fuse_dispatcher *fdip) { fdip->iosize = bufsize + cnp->cn_namelen + 1; fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred); memcpy(fdip->indata, buf, bufsize); memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0'; } int fuse_internal_newentry_core(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum vtype vtyp, struct fuse_dispatcher *fdip) { int err = 0; struct fuse_entry_out *feo; struct mount *mp = vnode_mount(dvp); if ((err = fdisp_wait_answ(fdip))) { return err; } feo = fdip->answ; if ((err = fuse_internal_checkentry(feo, vtyp))) { return err; } err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp); if (err) { fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred, feo->nodeid, 1); return err; } /* * Purge the parent's attribute cache because the daemon should've * updated its mtime and ctime */ fuse_vnode_clear_attr_cache(dvp); fuse_internal_cache_attrs(*vpp, NULL, &feo->attr, feo->attr_valid, feo->attr_valid_nsec, NULL); return err; } int fuse_internal_newentry(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum fuse_opcode op, void *buf, size_t bufsize, enum vtype vtype) { int err; struct fuse_dispatcher fdi; struct mount *mp = vnode_mount(dvp); fdisp_init(&fdi, 0); fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf, bufsize, &fdi); err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi); fdisp_destroy(&fdi); return err; } /* entity destruction */ int fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio) { fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL, ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1); return 0; } void fuse_internal_forget_send(struct mount *mp, struct thread *td, struct ucred *cred, uint64_t nodeid, uint64_t nlookup) { struct fuse_dispatcher fdi; struct fuse_forget_in *ffi; /* * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu", * (long long unsigned) nodeid)); */ fdisp_init(&fdi, sizeof(*ffi)); fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred); ffi = fdi.indata; ffi->nlookup = nlookup; fuse_insert_message(fdi.tick, false); fdisp_destroy(&fdi); } /* Fetch the vnode's attributes from the daemon*/ int fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { struct fuse_dispatcher fdi; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_getattr_in *fgai; struct fuse_attr_out *fao; off_t old_filesize = fvdat->cached_attrs.va_size; enum vtype vtyp; int err; fdisp_init(&fdi, 0); fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred); fgai = fdi.indata; /* * We could look up a file handle and set it in fgai->fh, but that * involves extra runtime work and I'm unaware of any file systems that * care. */ fgai->getattr_flags = 0; if ((err = fdisp_simple_putget_vp(&fdi, FUSE_GETATTR, vp, td, cred))) { if (err == ENOENT) fuse_internal_vnode_disappear(vp); goto out; } fao = (struct fuse_attr_out *)fdi.answ; vtyp = IFTOVT(fao->attr.mode); if (fvdat->flag & FN_SIZECHANGE) fao->attr.size = old_filesize; fuse_internal_cache_attrs(vp, NULL, &fao->attr, fao->attr_valid, fao->attr_valid_nsec, vap); if (vtyp != vnode_vtype(vp)) { fuse_internal_vnode_disappear(vp); err = ENOENT; } out: fdisp_destroy(&fdi); return err; } /* Read a vnode's attributes from cache or fetch them from the fuse daemon */ int fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { struct vattr *attrs; if ((attrs = VTOVA(vp)) != NULL) { *vap = *attrs; /* struct copy */ return 0; } return fuse_internal_do_getattr(vp, vap, cred, td); } void fuse_internal_vnode_disappear(struct vnode *vp) { struct fuse_vnode_data *fvdat = VTOFUD(vp); ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear"); fvdat->flag |= FN_REVOKED; bintime_clear(&fvdat->attr_cache_timeout); cache_purge(vp); } /* fuse start/stop */ int fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio) { int err = 0; struct fuse_data *data = tick->tk_data; struct fuse_init_out *fiio; if ((err = tick->tk_aw_ohead.error)) { goto out; } if ((err = fticket_pull(tick, uio))) { goto out; } fiio = fticket_resp(tick)->base; /* XXX: Do we want to check anything further besides this? */ if (fiio->major < 7) { SDT_PROBE2(fusefs, , internal, trace, 1, "userpace version too low"); err = EPROTONOSUPPORT; goto out; } data->fuse_libabi_major = fiio->major; data->fuse_libabi_minor = fiio->minor; if (fuse_libabi_geq(data, 7, 5)) { if (fticket_resp(tick)->len == sizeof(struct fuse_init_out)) { data->max_write = fiio->max_write; if (fiio->flags & FUSE_ASYNC_READ) data->dataflags |= FSESS_ASYNC_READ; if (fiio->flags & FUSE_POSIX_LOCKS) data->dataflags |= FSESS_POSIX_LOCKS; if (fiio->flags & FUSE_EXPORT_SUPPORT) data->dataflags |= FSESS_EXPORT_SUPPORT; + /* + * Don't bother to check FUSE_BIG_WRITES, because it's + * redundant with max_write + */ } else { err = EINVAL; } } else { /* Old fix values */ data->max_write = 4096; } out: if (err) { fdata_set_dead(data); } FUSE_LOCK(); data->dataflags |= FSESS_INITED; wakeup(&data->ticketer); FUSE_UNLOCK(); return 0; } void fuse_internal_send_init(struct fuse_data *data, struct thread *td) { struct fuse_init_in *fiii; struct fuse_dispatcher fdi; fdisp_init(&fdi, sizeof(*fiii)); fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL); fiii = fdi.indata; fiii->major = FUSE_KERNEL_VERSION; fiii->minor = FUSE_KERNEL_MINOR_VERSION; /* * fusefs currently doesn't do any readahead other than fetching whole * buffer cache block sized regions at once. So the max readahead is * the size of a buffer cache block. */ fiii->max_readahead = maxbcachebuf; - fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT ; + /* + * Unsupported features: + * FUSE_FILE_OPS: No known FUSE server or client supports it + * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it + */ + fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT + | FUSE_BIG_WRITES; fuse_insert_callback(fdi.tick, fuse_internal_init_callback); fuse_insert_message(fdi.tick, false); fdisp_destroy(&fdi); } /* * Send a FUSE_SETATTR operation with no permissions checks. If cred is NULL, * send the request with root credentials */ int fuse_internal_setattr(struct vnode *vp, struct vattr *vap, struct thread *td, struct ucred *cred) { struct fuse_dispatcher fdi; struct fuse_setattr_in *fsai; struct mount *mp; pid_t pid = td->td_proc->p_pid; struct fuse_data *data; int dataflags; int err = 0; enum vtype vtyp; int sizechanged = -1; uint64_t newsize = 0; mp = vnode_mount(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; fdisp_init(&fdi, sizeof(*fsai)); fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred); if (!cred) { fdi.finh->uid = 0; fdi.finh->gid = 0; } fsai = fdi.indata; fsai->valid = 0; if (vap->va_uid != (uid_t)VNOVAL) { fsai->uid = vap->va_uid; fsai->valid |= FATTR_UID; } if (vap->va_gid != (gid_t)VNOVAL) { fsai->gid = vap->va_gid; fsai->valid |= FATTR_GID; } if (vap->va_size != VNOVAL) { struct fuse_filehandle *fufh = NULL; /*Truncate to a new value. */ fsai->size = vap->va_size; sizechanged = 1; newsize = vap->va_size; fsai->valid |= FATTR_SIZE; fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); if (fufh) { fsai->fh = fufh->fh_id; fsai->valid |= FATTR_FH; } VTOFUD(vp)->flag &= ~FN_SIZECHANGE; } if (vap->va_atime.tv_sec != VNOVAL) { fsai->atime = vap->va_atime.tv_sec; fsai->atimensec = vap->va_atime.tv_nsec; fsai->valid |= FATTR_ATIME; if (vap->va_vaflags & VA_UTIMES_NULL) fsai->valid |= FATTR_ATIME_NOW; } if (vap->va_mtime.tv_sec != VNOVAL) { fsai->mtime = vap->va_mtime.tv_sec; fsai->mtimensec = vap->va_mtime.tv_nsec; fsai->valid |= FATTR_MTIME; if (vap->va_vaflags & VA_UTIMES_NULL) fsai->valid |= FATTR_MTIME_NOW; } if (vap->va_mode != (mode_t)VNOVAL) { fsai->mode = vap->va_mode & ALLPERMS; fsai->valid |= FATTR_MODE; } if (!fsai->valid) { goto out; } if ((err = fdisp_wait_answ(&fdi))) goto out; vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode); if (vnode_vtype(vp) != vtyp) { if (vnode_vtype(vp) == VNON && vtyp != VNON) { SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! " "vnode_vtype is VNON and vtype isn't."); } else { /* * STALE vnode, ditch * * The vnode has changed its type "behind our back". * There's nothing really we can do, so let us just * force an internal revocation and tell the caller to * try again, if interested. */ fuse_internal_vnode_disappear(vp); err = EAGAIN; } } if (err == 0) { struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ; fuse_internal_cache_attrs(vp, cred, &fao->attr, fao->attr_valid, fao->attr_valid_nsec, NULL); } out: fdisp_destroy(&fdi); return err; } #ifdef ZERO_PAD_INCOMPLETE_BUFS static int isbzero(void *buf, size_t len) { int i; for (i = 0; i < len; i++) { if (((char *)buf)[i]) return (0); } return (1); } #endif Index: projects/fuse2/sys/fs/fuse/fuse_io.c =================================================================== --- projects/fuse2/sys/fs/fuse/fuse_io.c (revision 348318) +++ projects/fuse2/sys/fs/fuse/fuse_io.c (revision 348319) @@ -1,996 +1,996 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_file.h" #include "fuse_node.h" #include "fuse_internal.h" #include "fuse_ipc.h" #include "fuse_io.h" SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , io, trace, "int", "char*"); static void fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred, struct thread *td); static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh); static int fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid); static int fuse_write_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize, int ioflag, bool pages); static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid); /* * FreeBSD clears the SUID and SGID bits on any write by a non-root user. */ static void fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred, struct thread *td) { struct fuse_data *data; struct mount *mp; struct vattr va; int dataflags; mp = vnode_mount(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; if (dataflags & FSESS_DEFAULT_PERMISSIONS) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { fuse_internal_getattr(vp, &va, cred, td); if (va.va_mode & (S_ISUID | S_ISGID)) { mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID); /* Clear all vattr fields except mode */ vattr_null(&va); va.va_mode = mode; /* * Ignore fuse_internal_setattr's return value, * because at this point the write operation has * already succeeded and we don't want to return * failing status for that. */ (void)fuse_internal_setattr(vp, &va, td, NULL); } } } } SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch, "struct vnode*", "struct uio*", "int", "struct ucred*", "struct fuse_filehandle*"); int fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, bool pages, struct ucred *cred, pid_t pid) { struct fuse_filehandle *fufh; int err, directio; int fflag; bool closefufh = false; MPASS(vp->v_type == VREG || vp->v_type == VDIR); fflag = (uio->uio_rw == UIO_READ) ? FREAD : FWRITE; err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { /* * nfsd will do I/O without first doing VOP_OPEN. We * must implicitly open the file here */ err = fuse_filehandle_open(vp, fflag, &fufh, curthread, cred); closefufh = true; } else if (err) { printf("FUSE: io dispatch: filehandles are closed\n"); return err; } if (err) goto out; SDT_PROBE5(fusefs, , io, io_dispatch, vp, uio, ioflag, cred, fufh); /* * Ideally, when the daemon asks for direct io at open time, the * standard file flag should be set according to this, so that would * just change the default mode, which later on could be changed via * fcntl(2). * But this doesn't work, the O_DIRECT flag gets cleared at some point * (don't know where). So to make any use of the Fuse direct_io option, * we hardwire it into the file's private data (similarly to Linux, * btw.). */ directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); switch (uio->uio_rw) { case UIO_READ: if (directio) { SDT_PROBE2(fusefs, , io, trace, 1, "direct read of vnode"); err = fuse_read_directbackend(vp, uio, cred, fufh); } else { SDT_PROBE2(fusefs, , io, trace, 1, "buffered read of vnode"); err = fuse_read_biobackend(vp, uio, ioflag, cred, fufh, pid); } break; case UIO_WRITE: /* * Kludge: simulate write-through caching via write-around * caching. Same effect, as far as never caching dirty data, * but slightly pessimal in that newly written data is not * cached. */ if (directio || fuse_data_cache_mode == FUSE_CACHE_WT) { const int iosize = fuse_iosize(vp); off_t start, end, filesize; SDT_PROBE2(fusefs, , io, trace, 1, "direct write of vnode"); err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) goto out; start = uio->uio_offset; end = start + uio->uio_resid; /* * Invalidate the write cache unless we're coming from * VOP_PUTPAGES, in which case we're writing _from_ the * write cache */ if (!pages ) v_inval_buf_range(vp, start, end, iosize); err = fuse_write_directbackend(vp, uio, cred, fufh, filesize, ioflag, pages); } else { SDT_PROBE2(fusefs, , io, trace, 1, "buffered write of vnode"); err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag, pid); } fuse_io_clear_suid_on_write(vp, cred, uio->uio_td); break; default: panic("uninterpreted mode passed to fuse_io_dispatch"); } out: if (closefufh) fuse_filehandle_close(vp, fufh, curthread, cred); return (err); } SDT_PROBE_DEFINE3(fusefs, , io, read_bio_backend_start, "int", "int", "int"); SDT_PROBE_DEFINE2(fusefs, , io, read_bio_backend_feed, "int", "int"); SDT_PROBE_DEFINE3(fusefs, , io, read_bio_backend_end, "int", "ssize_t", "int"); static int fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid) { struct buf *bp; daddr_t lbn; int bcount; int err, n = 0, on = 0; off_t filesize; const int biosize = fuse_iosize(vp); if (uio->uio_offset < 0) return (EINVAL); err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) return err; for (err = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if (fuse_isdeadfs(vp)) { err = ENXIO; break; } if (filesize - uio->uio_offset <= 0) break; lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); SDT_PROBE3(fusefs, , io, read_bio_backend_start, biosize, (int)lbn, on); if ((off_t)lbn * biosize >= filesize) { bcount = 0; } else if ((off_t)(lbn + 1) * biosize > filesize) { bcount = filesize - (off_t)lbn *biosize; } else { bcount = biosize; } /* TODO: readahead. See ext2_read for an example */ err = bread(vp, lbn, bcount, NOCRED, &bp); if (err) { brelse(bp); bp = NULL; break; } /* * on is the offset into the current bp. Figure out how many * bytes we can copy out of the bp. Note that bcount is * NOT DEV_BSIZE aligned. * * Then figure out how many bytes we can copy into the uio. */ n = 0; if (on < bcount) n = MIN((unsigned)(bcount - on), uio->uio_resid); if (n > 0) { SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, n + (int)bp->b_resid); err = uiomove(bp->b_data + on, n, uio); } vfs_bio_brelse(bp, ioflag); SDT_PROBE3(fusefs, , io, read_bio_backend_end, err, uio->uio_resid, n); } return (err); } SDT_PROBE_DEFINE1(fusefs, , io, read_directbackend_start, "struct fuse_read_in*"); SDT_PROBE_DEFINE2(fusefs, , io, read_directbackend_complete, "struct fuse_dispatcher*", "struct uio*"); static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh) { struct fuse_data *data; struct fuse_dispatcher fdi; struct fuse_read_in *fri; int err = 0; data = fuse_get_mpdata(vp->v_mount); if (uio->uio_resid == 0) return (0); fdisp_init(&fdi, 0); /* * XXX In "normal" case we use an intermediate kernel buffer for * transmitting data from daemon's context to ours. Eventually, we should * get rid of this. Anyway, if the target uio lives in sysspace (we are * called from pageops), and the input data doesn't need kernel-side * processing (we are not called from readdir) we can already invoke * an optimized, "peer-to-peer" I/O routine. */ while (uio->uio_resid > 0) { fdi.iosize = sizeof(*fri); fdisp_make_vp(&fdi, FUSE_READ, vp, uio->uio_td, cred); fri = fdi.indata; fri->fh = fufh->fh_id; fri->offset = uio->uio_offset; fri->size = MIN(uio->uio_resid, fuse_get_mpdata(vp->v_mount)->max_read); if (fuse_libabi_geq(data, 7, 9)) { /* See comment regarding FUSE_WRITE_LOCKOWNER */ fri->read_flags = 0; - fri->flags = 0; /* TODO */ + fri->flags = fufh_type_2_fflags(fufh->fufh_type); } SDT_PROBE1(fusefs, , io, read_directbackend_start, fri); if ((err = fdisp_wait_answ(&fdi))) goto out; SDT_PROBE2(fusefs, , io, read_directbackend_complete, fdi.iosize, uio); if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio))) break; if (fdi.iosize < fri->size) break; } out: fdisp_destroy(&fdi); return (err); } static int fuse_write_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize, int ioflag, bool pages) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_data *data; struct fuse_write_in *fwi; struct fuse_write_out *fwo; struct fuse_dispatcher fdi; size_t chunksize; void *fwi_data; off_t as_written_offset; int diff; int err = 0; bool direct_io = fufh->fuse_open_flags & FOPEN_DIRECT_IO; uint32_t write_flags; data = fuse_get_mpdata(vp->v_mount); /* * Don't set FUSE_WRITE_LOCKOWNER in write_flags. It can't be set * accurately when using POSIX AIO, libfuse doesn't use it, and I'm not * aware of any file systems that do. It was an attempt to add * Linux-style mandatory locking to the FUSE protocol, but mandatory * locking is deprecated even on Linux. See Linux commit * f33321141b273d60cbb3a8f56a5489baad82ba5e . */ /* * Set FUSE_WRITE_CACHE whenever we don't know the uid, gid, and/or pid * that originated a write. For example when writing from the * writeback cache. I don't know of a single file system that cares, * but the protocol says we're supposed to do this. */ write_flags = !pages && ( (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)) || fuse_data_cache_mode != FUSE_CACHE_WB) ? 0 : FUSE_WRITE_CACHE; if (uio->uio_resid == 0) return (0); if (ioflag & IO_APPEND) uio_setoffset(uio, filesize); fdisp_init(&fdi, 0); while (uio->uio_resid > 0) { chunksize = MIN(uio->uio_resid, data->max_write); fdi.iosize = sizeof(*fwi) + chunksize; fdisp_make_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred); fwi = fdi.indata; fwi->fh = fufh->fh_id; fwi->offset = uio->uio_offset; fwi->size = chunksize; fwi->write_flags = write_flags; if (fuse_libabi_geq(data, 7, 9)) { - fwi->flags = 0; /* TODO */ + fwi->flags = fufh_type_2_fflags(fufh->fufh_type); fwi_data = (char *)fdi.indata + sizeof(*fwi); } else { fwi_data = (char *)fdi.indata + FUSE_COMPAT_WRITE_IN_SIZE; } if ((err = uiomove(fwi_data, chunksize, uio))) break; retry: err = fdisp_wait_answ(&fdi); if (err == ERESTART || err == EINTR || err == EWOULDBLOCK) { /* * Rewind the uio so dofilewrite will know it's * incomplete */ uio->uio_resid += fwi->size; uio->uio_offset -= fwi->size; /* * Change ERESTART into EINTR because we can't rewind * uio->uio_iov. Basically, once uiomove(9) has been * called, it's impossible to restart a syscall. */ if (err == ERESTART) err = EINTR; break; } else if (err) { break; } fwo = ((struct fuse_write_out *)fdi.answ); /* Adjust the uio in the case of short writes */ diff = fwi->size - fwo->size; as_written_offset = uio->uio_offset - diff; if (as_written_offset - diff > filesize && fuse_data_cache_mode != FUSE_CACHE_UC) fuse_vnode_setsize(vp, cred, as_written_offset); if (as_written_offset - diff >= filesize) fvdat->flag &= ~FN_SIZECHANGE; if (diff < 0) { printf("WARNING: misbehaving FUSE filesystem " "wrote more data than we provided it\n"); err = EINVAL; break; } else if (diff > 0) { /* Short write */ if (!direct_io) { printf("WARNING: misbehaving FUSE filesystem: " "short writes are only allowed with " "direct_io\n"); } if (ioflag & IO_DIRECT) { /* Return early */ uio->uio_resid += diff; uio->uio_offset -= diff; break; } else { /* Resend the unwritten portion of data */ fdi.iosize = sizeof(*fwi) + diff; /* Refresh fdi without clearing data buffer */ fdisp_refresh_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred); fwi = fdi.indata; MPASS2(fwi == fdi.indata, "FUSE dispatcher " "reallocated despite no increase in " "size?"); void *src = (char*)fwi_data + fwo->size; memmove(fwi_data, src, diff); fwi->fh = fufh->fh_id; fwi->offset = as_written_offset; fwi->size = diff; fwi->write_flags = write_flags; goto retry; } } } fdisp_destroy(&fdi); return (err); } SDT_PROBE_DEFINE6(fusefs, , io, write_biobackend_start, "int64_t", "int", "int", "struct uio*", "int", "bool"); SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_append_race, "long", "int"); static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct buf *bp; daddr_t lbn; off_t filesize; int bcount; int n, on, err = 0; const int biosize = fuse_iosize(vp); KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode")); if (vp->v_type != VREG) return (EIO); if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) return err; if (ioflag & IO_APPEND) uio_setoffset(uio, filesize); /* * Find all of this file's B_NEEDCOMMIT buffers. If our writes * would exceed the local maximum per-file write commit size when * combined with those, we must decide whether to flush, * go synchronous, or return err. We don't bother checking * IO_UNIT -- we just make all writes atomic anyway, as there's * no point optimizing for something that really won't ever happen. */ do { if (fuse_isdeadfs(vp)) { err = ENXIO; break; } lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); n = MIN((unsigned)(biosize - on), uio->uio_resid); again: /* * Handle direct append and file extension cases, calculate * unaligned buffer size. */ if (uio->uio_offset == filesize && n) { /* * Get the buffer (in its pre-append state to maintain * B_CACHE if it was previously set). Resize the * nfsnode after we have locked the buffer to prevent * readers from reading garbage. */ bcount = on; SDT_PROBE6(fusefs, , io, write_biobackend_start, lbn, on, n, uio, bcount, true); bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); if (bp != NULL) { long save; err = fuse_vnode_setsize(vp, cred, uio->uio_offset + n); fvdat->flag |= FN_SIZECHANGE; if (err) { brelse(bp); break; } save = bp->b_flags & B_CACHE; bcount += n; allocbuf(bp, bcount); bp->b_flags |= save; } } else { /* * Obtain the locked cache block first, and then * adjust the file's size as appropriate. */ bcount = on + n; if ((off_t)lbn * biosize + bcount < filesize) { if ((off_t)(lbn + 1) * biosize < filesize) bcount = biosize; else bcount = filesize - (off_t)lbn *biosize; } SDT_PROBE6(fusefs, , io, write_biobackend_start, lbn, on, n, uio, bcount, false); bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); if (bp && uio->uio_offset + n > filesize) { err = fuse_vnode_setsize(vp, cred, uio->uio_offset + n); fvdat->flag |= FN_SIZECHANGE; if (err) { brelse(bp); break; } } } if (!bp) { err = EINTR; break; } /* * Issue a READ if B_CACHE is not set. In special-append * mode, B_CACHE is based on the buffer prior to the write * op and is typically set, avoiding the read. If a read * is required in special append mode, the server will * probably send us a short-read since we extended the file * on our end, resulting in b_resid == 0 and, thusly, * B_CACHE getting set. * * We can also avoid issuing the read if the write covers * the entire buffer. We have to make sure the buffer state * is reasonable in this case since we will not be initiating * I/O. See the comments in kern/vfs_bio.c's getblk() for * more information. * * B_CACHE may also be set due to the buffer being cached * normally. */ if (on == 0 && n == bcount) { bp->b_flags |= B_CACHE; bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; } if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); fuse_io_strategy(vp, bp); if ((err = bp->b_error)) { brelse(bp); break; } } if (bp->b_wcred == NOCRED) bp->b_wcred = crhold(cred); /* * If dirtyend exceeds file size, chop it down. This should * not normally occur but there is an append race where it * might occur XXX, so we log it. * * If the chopping creates a reverse-indexed or degenerate * situation with dirtyoff/end, we 0 both of them. */ if (bp->b_dirtyend > bcount) { SDT_PROBE2(fusefs, , io, write_biobackend_append_race, (long)bp->b_blkno * biosize, bp->b_dirtyend - bcount); bp->b_dirtyend = bcount; } if (bp->b_dirtyoff >= bp->b_dirtyend) bp->b_dirtyoff = bp->b_dirtyend = 0; /* * If the new write will leave a contiguous dirty * area, just update the b_dirtyoff and b_dirtyend, * otherwise force a write rpc of the old dirty area. * * While it is possible to merge discontiguous writes due to * our having a B_CACHE buffer ( and thus valid read data * for the hole), we don't because it could lead to * significant cache coherency problems with multiple clients, * especially if locking is implemented later on. * * as an optimization we could theoretically maintain * a linked list of discontinuous areas, but we would still * have to commit them separately so there isn't much * advantage to it except perhaps a bit of asynchronization. */ if (bp->b_dirtyend > 0 && (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { /* * Yes, we mean it. Write out everything to "storage" * immediately, without hesitation. (Apart from other * reasons: the only way to know if a write is valid * if its actually written out.) */ bwrite(bp); if (bp->b_error == EINTR) { err = EINTR; break; } goto again; } err = uiomove((char *)bp->b_data + on, n, uio); /* * Since this block is being modified, it must be written * again and not just committed. Since write clustering does * not work for the stage 1 data write, only the stage 2 * commit rpc, we have to clear B_CLUSTEROK as well. */ bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); if (err) { bp->b_ioflags |= BIO_ERROR; bp->b_error = err; brelse(bp); break; } /* * Only update dirtyoff/dirtyend if not a degenerate * condition. */ if (n) { if (bp->b_dirtyend > 0) { bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); bp->b_dirtyend = MAX((on + n), bp->b_dirtyend); } else { bp->b_dirtyoff = on; bp->b_dirtyend = on + n; } vfs_bio_set_valid(bp, on, n); } err = bwrite(bp); if (err) break; } while (uio->uio_resid > 0 && n > 0); return (err); } int fuse_io_strategy(struct vnode *vp, struct buf *bp) { struct fuse_filehandle *fufh; struct ucred *cred; struct uio *uiop; struct uio uio; struct iovec io; off_t filesize; int error = 0; int fflag; /* We don't know the true pid when we're dealing with the cache */ pid_t pid = 0; const int biosize = fuse_iosize(vp); MPASS(vp->v_type == VREG || vp->v_type == VDIR); MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE); fflag = bp->b_iocmd == BIO_READ ? FREAD : FWRITE; cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred; error = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); if (bp->b_iocmd == BIO_READ && error == EBADF) { /* * This may be a read-modify-write operation on a cached file * opened O_WRONLY. The FUSE protocol allows this. */ error = fuse_filehandle_get(vp, FWRITE, &fufh, cred, pid); } if (error) { printf("FUSE: strategy: filehandles are closed\n"); bp->b_ioflags |= BIO_ERROR; bp->b_error = error; bufdone(bp); return (error); } uiop = &uio; uiop->uio_iov = &io; uiop->uio_iovcnt = 1; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = curthread; /* * clear BIO_ERROR and B_INVAL state prior to initiating the I/O. We * do this here so we do not have to do it in all the code that * calls us. */ bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; KASSERT(!(bp->b_flags & B_DONE), ("fuse_io_strategy: bp %p already marked done", bp)); if (bp->b_iocmd == BIO_READ) { io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; uiop->uio_offset = ((off_t)bp->b_blkno) * biosize; error = fuse_read_directbackend(vp, uiop, cred, fufh); if (!error && uiop->uio_resid) { /* * If we had a short read with no error, we must have * hit a file hole. We should zero-fill the remainder. * This can also occur if the server hits the file EOF. * * Holes used to be able to occur due to pending * writes, but that is not possible any longer. */ int nread = bp->b_bcount - uiop->uio_resid; int left = uiop->uio_resid; if (left > 0) bzero((char *)bp->b_data + nread, left); uiop->uio_resid = 0; } if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; } } else { /* * If we only need to commit, try to commit */ if (bp->b_flags & B_NEEDCOMMIT) { SDT_PROBE2(fusefs, , io, trace, 1, "write: B_NEEDCOMMIT flags set"); } /* * Setup for actual write */ error = fuse_vnode_size(vp, &filesize, cred, curthread); if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; bufdone(bp); return (error); } if ((off_t)bp->b_blkno * biosize + bp->b_dirtyend > filesize) bp->b_dirtyend = filesize - (off_t)bp->b_blkno * biosize; if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; uiop->uio_offset = (off_t)bp->b_blkno * biosize + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; error = fuse_write_directbackend(vp, uiop, cred, fufh, filesize, 0, false); if (error == EINTR || error == ETIMEDOUT || (!error && (bp->b_flags & B_NEEDCOMMIT))) { bp->b_flags &= ~(B_INVAL | B_NOCACHE); if ((bp->b_flags & B_PAGING) == 0) { bdirty(bp); bp->b_flags &= ~B_DONE; } if ((error == EINTR || error == ETIMEDOUT) && (bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; } else { if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_flags |= B_INVAL; bp->b_error = error; } bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; bufdone(bp); return (0); } } bp->b_resid = uiop->uio_resid; bufdone(bp); return (error); } int fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td) { return (vn_fsync_buf(vp, waitfor)); } /* * Flush and invalidate all dirty buffers. If another process is already * doing the flush, just wait for completion. */ int fuse_io_invalbuf(struct vnode *vp, struct thread *td) { struct fuse_vnode_data *fvdat = VTOFUD(vp); int error = 0; if (vp->v_iflag & VI_DOOMED) return 0; ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf"); while (fvdat->flag & FN_FLUSHINPROG) { struct proc *p = td->td_proc; if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) return EIO; fvdat->flag |= FN_FLUSHWANT; tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz); error = 0; if (p != NULL) { PROC_LOCK(p); if (SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) error = EINTR; PROC_UNLOCK(p); } if (error == EINTR) return EINTR; } fvdat->flag |= FN_FLUSHINPROG; if (vp->v_bufobj.bo_object != NULL) { VM_OBJECT_WLOCK(vp->v_bufobj.bo_object); vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object); } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); while (error) { if (error == ERESTART || error == EINTR) { fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return EINTR; } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); } fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return (error); } Index: projects/fuse2/tests/sys/fs/fusefs/mockfs.cc =================================================================== --- projects/fuse2/tests/sys/fs/fusefs/mockfs.cc (revision 348318) +++ projects/fuse2/tests/sys/fs/fusefs/mockfs.cc (revision 348319) @@ -1,612 +1,616 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 The FreeBSD Foundation * * This software was developed by BFF Storage Systems, LLC under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ extern "C" { #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mntopts.h" // for build_iovec } #include #include #include "mockfs.hh" using namespace testing; int verbosity = 0; const char* opcode2opname(uint32_t opcode) { const int NUM_OPS = 39; const char* table[NUM_OPS] = { "Unknown (opcode 0)", "LOOKUP", "FORGET", "GETATTR", "SETATTR", "READLINK", "SYMLINK", "Unknown (opcode 7)", "MKNOD", "MKDIR", "UNLINK", "RMDIR", "RENAME", "LINK", "OPEN", "READ", "WRITE", "STATFS", "RELEASE", "Unknown (opcode 19)", "FSYNC", "SETXATTR", "GETXATTR", "LISTXATTR", "REMOVEXATTR", "FLUSH", "INIT", "OPENDIR", "READDIR", "RELEASEDIR", "FSYNCDIR", "GETLK", "SETLK", "SETLKW", "ACCESS", "CREATE", "INTERRUPT", "BMAP", "DESTROY" }; if (opcode >= NUM_OPS) return ("Unknown (opcode > max)"); else return (table[opcode]); } ProcessMockerT ReturnErrno(int error) { return([=](auto in, auto &out) { std::unique_ptr out0(new mockfs_buf_out); out0->header.unique = in.header.unique; out0->header.error = -error; out0->header.len = sizeof(out0->header); out.push_back(std::move(out0)); }); } /* Helper function used for returning negative cache entries for LOOKUP */ ProcessMockerT ReturnNegativeCache(const struct timespec *entry_valid) { return([=](auto in, auto &out) { /* nodeid means ENOENT and cache it */ std::unique_ptr out0(new mockfs_buf_out); out0->body.entry.nodeid = 0; out0->header.unique = in.header.unique; out0->header.error = 0; out0->body.entry.entry_valid = entry_valid->tv_sec; out0->body.entry.entry_valid_nsec = entry_valid->tv_nsec; SET_OUT_HEADER_LEN(*out0, entry); out.push_back(std::move(out0)); }); } ProcessMockerT ReturnImmediate(std::function f) { return([=](auto& in, auto &out) { std::unique_ptr out0(new mockfs_buf_out); out0->header.unique = in.header.unique; f(in, *out0); out.push_back(std::move(out0)); }); } void sigint_handler(int __unused sig) { // Don't do anything except interrupt the daemon's read(2) call } void debug_fuseop(const mockfs_buf_in &in) { printf("%-11s ino=%2" PRIu64, opcode2opname(in.header.opcode), in.header.nodeid); if (verbosity > 1) { printf(" uid=%5u gid=%5u pid=%5u unique=%" PRIu64 " len=%u", in.header.uid, in.header.gid, in.header.pid, in.header.unique, in.header.len); } switch (in.header.opcode) { const char *name, *value; case FUSE_ACCESS: printf(" mask=%#x", in.body.access.mask); break; case FUSE_CREATE: name = (const char*)in.body.bytes + sizeof(fuse_open_in); printf(" flags=%#x name=%s", in.body.open.flags, name); break; case FUSE_FLUSH: printf(" fh=%#" PRIx64 " lock_owner=%" PRIu64, in.body.flush.fh, in.body.flush.lock_owner); break; case FUSE_FORGET: printf(" nlookup=%" PRIu64, in.body.forget.nlookup); break; case FUSE_FSYNC: printf(" flags=%#x", in.body.fsync.fsync_flags); break; case FUSE_FSYNCDIR: printf(" flags=%#x", in.body.fsyncdir.fsync_flags); break; case FUSE_INTERRUPT: printf(" unique=%" PRIu64, in.body.interrupt.unique); break; case FUSE_LINK: printf(" oldnodeid=%" PRIu64, in.body.link.oldnodeid); break; case FUSE_LOOKUP: printf(" %s", in.body.lookup); break; case FUSE_MKDIR: name = (const char*)in.body.bytes + sizeof(fuse_mkdir_in); printf(" name=%s mode=%#o", name, in.body.mkdir.mode); break; case FUSE_MKNOD: printf(" mode=%#o rdev=%x", in.body.mknod.mode, in.body.mknod.rdev); break; case FUSE_OPEN: printf(" flags=%#x mode=%#o", in.body.open.flags, in.body.open.mode); break; case FUSE_OPENDIR: printf(" flags=%#x mode=%#o", in.body.opendir.flags, in.body.opendir.mode); break; case FUSE_READ: printf(" offset=%" PRIu64 " size=%u", in.body.read.offset, in.body.read.size); + if (verbosity > 1) + printf(" flags=%#x", in.body.read.flags); break; case FUSE_READDIR: printf(" fh=%#" PRIx64 " offset=%" PRIu64 " size=%u", in.body.readdir.fh, in.body.readdir.offset, in.body.readdir.size); break; case FUSE_RELEASE: printf(" fh=%#" PRIx64 " flags=%#x lock_owner=%" PRIu64, in.body.release.fh, in.body.release.flags, in.body.release.lock_owner); break; case FUSE_SETATTR: if (verbosity <= 1) { printf(" valid=%#x", in.body.setattr.valid); break; } if (in.body.setattr.valid & FATTR_MODE) printf(" mode=%#o", in.body.setattr.mode); if (in.body.setattr.valid & FATTR_UID) printf(" uid=%u", in.body.setattr.uid); if (in.body.setattr.valid & FATTR_GID) printf(" gid=%u", in.body.setattr.gid); if (in.body.setattr.valid & FATTR_SIZE) printf(" size=%" PRIu64, in.body.setattr.size); if (in.body.setattr.valid & FATTR_ATIME) printf(" atime=%" PRIu64 ".%u", in.body.setattr.atime, in.body.setattr.atimensec); if (in.body.setattr.valid & FATTR_MTIME) printf(" mtime=%" PRIu64 ".%u", in.body.setattr.mtime, in.body.setattr.mtimensec); if (in.body.setattr.valid & FATTR_FH) printf(" fh=%" PRIu64 "", in.body.setattr.fh); break; case FUSE_SETLK: printf(" fh=%#" PRIx64 " owner=%" PRIu64 " type=%u pid=%u", in.body.setlk.fh, in.body.setlk.owner, in.body.setlk.lk.type, in.body.setlk.lk.pid); if (verbosity >= 2) { printf(" range=[%" PRIu64 "-%" PRIu64 "]", in.body.setlk.lk.start, in.body.setlk.lk.end); } break; case FUSE_SETXATTR: /* * In theory neither the xattr name and value need be * ASCII, but in this test suite they always are. */ name = (const char*)in.body.bytes + sizeof(fuse_setxattr_in); value = name + strlen(name) + 1; printf(" %s=%s", name, value); break; case FUSE_WRITE: printf(" fh=%#" PRIx64 " offset=%" PRIu64 - " size=%u flags=%u", + " size=%u write_flags=%u", in.body.write.fh, in.body.write.offset, in.body.write.size, in.body.write.write_flags); + if (verbosity > 1) + printf(" flags=%#x", in.body.write.flags); break; default: break; } printf("\n"); } MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions, bool push_symlinks_in, bool ro, enum poll_method pm, uint32_t flags, uint32_t kernel_minor_version) { struct sigaction sa; struct iovec *iov = NULL; int iovlen = 0; char fdstr[15]; const bool trueval = true; m_daemon_id = NULL; m_kernel_minor_version = kernel_minor_version; m_maxreadahead = max_readahead; m_nready = -1; m_pm = pm; m_quit = false; if (m_pm == KQ) m_kq = kqueue(); else m_kq = -1; /* * Kyua sets pwd to a testcase-unique tempdir; no need to use * mkdtemp */ /* * googletest doesn't allow ASSERT_ in constructors, so we must throw * instead. */ if (mkdir("mountpoint" , 0755) && errno != EEXIST) throw(std::system_error(errno, std::system_category(), "Couldn't make mountpoint directory")); switch (m_pm) { case BLOCKING: m_fuse_fd = open("/dev/fuse", O_CLOEXEC | O_RDWR); break; default: m_fuse_fd = open("/dev/fuse", O_CLOEXEC | O_RDWR | O_NONBLOCK); break; } if (m_fuse_fd < 0) throw(std::system_error(errno, std::system_category(), "Couldn't open /dev/fuse")); m_pid = getpid(); m_child_pid = -1; build_iovec(&iov, &iovlen, "fstype", __DECONST(void *, "fusefs"), -1); build_iovec(&iov, &iovlen, "fspath", __DECONST(void *, "mountpoint"), -1); build_iovec(&iov, &iovlen, "from", __DECONST(void *, "/dev/fuse"), -1); sprintf(fdstr, "%d", m_fuse_fd); build_iovec(&iov, &iovlen, "fd", fdstr, -1); if (allow_other) { build_iovec(&iov, &iovlen, "allow_other", __DECONST(void*, &trueval), sizeof(bool)); } if (default_permissions) { build_iovec(&iov, &iovlen, "default_permissions", __DECONST(void*, &trueval), sizeof(bool)); } if (push_symlinks_in) { build_iovec(&iov, &iovlen, "push_symlinks_in", __DECONST(void*, &trueval), sizeof(bool)); } if (ro) { build_iovec(&iov, &iovlen, "ro", __DECONST(void*, &trueval), sizeof(bool)); } if (nmount(iov, iovlen, 0)) throw(std::system_error(errno, std::system_category(), "Couldn't mount filesystem")); // Setup default handler ON_CALL(*this, process(_, _)) .WillByDefault(Invoke(this, &MockFS::process_default)); init(flags); bzero(&sa, sizeof(sa)); sa.sa_handler = sigint_handler; sa.sa_flags = 0; /* Don't set SA_RESTART! */ if (0 != sigaction(SIGUSR1, &sa, NULL)) throw(std::system_error(errno, std::system_category(), "Couldn't handle SIGUSR1")); if (pthread_create(&m_daemon_id, NULL, service, (void*)this)) throw(std::system_error(errno, std::system_category(), "Couldn't Couldn't start fuse thread")); } MockFS::~MockFS() { kill_daemon(); if (m_daemon_id != NULL) { pthread_join(m_daemon_id, NULL); m_daemon_id = NULL; } ::unmount("mountpoint", MNT_FORCE); rmdir("mountpoint"); if (m_kq >= 0) close(m_kq); } void MockFS::init(uint32_t flags) { std::unique_ptr in(new mockfs_buf_in); std::unique_ptr out(new mockfs_buf_out); read_request(*in); ASSERT_EQ(FUSE_INIT, in->header.opcode); out->header.unique = in->header.unique; out->header.error = 0; out->body.init.major = FUSE_KERNEL_VERSION; out->body.init.minor = m_kernel_minor_version;; out->body.init.flags = in->body.init.flags & flags; /* * The default max_write is set to this formula in libfuse, though * individual filesystems can lower it. The "- 4096" was added in * commit 154ffe2, with the commit message "fix". */ uint32_t default_max_write = 32 * getpagesize() + 0x1000 - 4096; /* For testing purposes, it should be distinct from MAXPHYS */ m_max_write = MIN(default_max_write, MAXPHYS / 2); out->body.init.max_write = m_max_write; out->body.init.max_readahead = m_maxreadahead; SET_OUT_HEADER_LEN(*out, init); write(m_fuse_fd, out.get(), out->header.len); } void MockFS::kill_daemon() { m_quit = true; if (m_daemon_id != NULL) pthread_kill(m_daemon_id, SIGUSR1); // Closing the /dev/fuse file descriptor first allows unmount to // succeed even if the daemon doesn't correctly respond to commands // during the unmount sequence. close(m_fuse_fd); m_fuse_fd = -1; } void MockFS::loop() { std::vector> out; std::unique_ptr in(new mockfs_buf_in); ASSERT_TRUE(in != NULL); while (!m_quit) { bzero(in.get(), sizeof(*in)); read_request(*in); if (m_quit) break; if (verbosity > 0) debug_fuseop(*in); if (pid_ok((pid_t)in->header.pid)) { process(*in, out); } else { /* * Reject any requests from unknown processes. Because * we actually do mount a filesystem, plenty of * unrelated system daemons may try to access it. */ if (verbosity > 1) printf("\tREJECTED (wrong pid %d)\n", in->header.pid); process_default(*in, out); } for (auto &it: out) write_response(*it); out.clear(); } } bool MockFS::pid_ok(pid_t pid) { if (pid == m_pid) { return (true); } else if (pid == m_child_pid) { return (true); } else { struct kinfo_proc *ki; bool ok = false; ki = kinfo_getproc(pid); if (ki == NULL) return (false); /* * Allow access by the aio daemon processes so that our tests * can use aio functions */ if (0 == strncmp("aiod", ki->ki_comm, 4)) ok = true; free(ki); return (ok); } } void MockFS::process_default(const mockfs_buf_in& in, std::vector> &out) { std::unique_ptr out0(new mockfs_buf_out); out0->header.unique = in.header.unique; out0->header.error = -EOPNOTSUPP; out0->header.len = sizeof(out0->header); out.push_back(std::move(out0)); } void MockFS::read_request(mockfs_buf_in &in) { ssize_t res; int nready = 0; fd_set readfds; pollfd fds[1]; struct kevent changes[1]; struct kevent events[1]; struct timespec timeout_ts; struct timeval timeout_tv; const int timeout_ms = 999; int timeout_int, nfds; switch (m_pm) { case BLOCKING: break; case KQ: timeout_ts.tv_sec = 0; timeout_ts.tv_nsec = timeout_ms * 1'000'000; while (nready == 0) { EV_SET(&changes[0], m_fuse_fd, EVFILT_READ, EV_ADD, 0, 0, 0); nready = kevent(m_kq, &changes[0], 1, &events[0], 1, &timeout_ts); if (m_quit) return; } ASSERT_LE(0, nready) << strerror(errno); ASSERT_EQ(events[0].ident, (uintptr_t)m_fuse_fd); if (events[0].flags & EV_ERROR) FAIL() << strerror(events[0].data); else if (events[0].flags & EV_EOF) FAIL() << strerror(events[0].fflags); m_nready = events[0].data; break; case POLL: timeout_int = timeout_ms; fds[0].fd = m_fuse_fd; fds[0].events = POLLIN; while (nready == 0) { nready = poll(fds, 1, timeout_int); if (m_quit) return; } ASSERT_LE(0, nready) << strerror(errno); ASSERT_TRUE(fds[0].revents & POLLIN); break; case SELECT: timeout_tv.tv_sec = 0; timeout_tv.tv_usec = timeout_ms * 1'000; nfds = m_fuse_fd + 1; while (nready == 0) { FD_ZERO(&readfds); FD_SET(m_fuse_fd, &readfds); nready = select(nfds, &readfds, NULL, NULL, &timeout_tv); if (m_quit) return; } ASSERT_LE(0, nready) << strerror(errno); ASSERT_TRUE(FD_ISSET(m_fuse_fd, &readfds)); break; default: FAIL() << "not yet implemented"; } res = read(m_fuse_fd, &in, sizeof(in)); if (res < 0 && !m_quit) perror("read"); ASSERT_TRUE(res >= static_cast(sizeof(in.header)) || m_quit); } void MockFS::write_response(const mockfs_buf_out &out) { fd_set writefds; pollfd fds[1]; int nready, nfds; ssize_t r; switch (m_pm) { case BLOCKING: case KQ: /* EVFILT_WRITE is not supported */ break; case POLL: fds[0].fd = m_fuse_fd; fds[0].events = POLLOUT; nready = poll(fds, 1, INFTIM); ASSERT_LE(0, nready) << strerror(errno); ASSERT_EQ(1, nready) << "NULL timeout expired?"; ASSERT_TRUE(fds[0].revents & POLLOUT); break; case SELECT: FD_ZERO(&writefds); FD_SET(m_fuse_fd, &writefds); nfds = m_fuse_fd + 1; nready = select(nfds, NULL, &writefds, NULL, NULL); ASSERT_LE(0, nready) << strerror(errno); ASSERT_EQ(1, nready) << "NULL timeout expired?"; ASSERT_TRUE(FD_ISSET(m_fuse_fd, &writefds)); break; default: FAIL() << "not yet implemented"; } r = write(m_fuse_fd, &out, out.header.len); ASSERT_TRUE(r > 0 || errno == EAGAIN) << strerror(errno); } void* MockFS::service(void *pthr_data) { MockFS *mock_fs = (MockFS*)pthr_data; mock_fs->loop(); return (NULL); } void MockFS::unmount() { ::unmount("mountpoint", 0); } Index: projects/fuse2/tests/sys/fs/fusefs/utils.cc =================================================================== --- projects/fuse2/tests/sys/fs/fusefs/utils.cc (revision 348318) +++ projects/fuse2/tests/sys/fs/fusefs/utils.cc (revision 348319) @@ -1,550 +1,557 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 The FreeBSD Foundation * * This software was developed by BFF Storage Systems, LLC under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ extern "C" { #include #include #include #include #include #include +#include #include #include #include #include } #include #include "mockfs.hh" #include "utils.hh" using namespace testing; /* Check that fusefs(4) is accessible and the current user can mount(2) */ void check_environment() { const char *devnode = "/dev/fuse"; const char *usermount_node = "vfs.usermount"; int usermount_val = 0; size_t usermount_size = sizeof(usermount_val); if (eaccess(devnode, R_OK | W_OK)) { if (errno == ENOENT) { GTEST_SKIP() << devnode << " does not exist"; } else if (errno == EACCES) { GTEST_SKIP() << devnode << " is not accessible by the current user"; } else { GTEST_SKIP() << strerror(errno); } } sysctlbyname(usermount_node, &usermount_val, &usermount_size, NULL, 0); if (geteuid() != 0 && !usermount_val) GTEST_SKIP() << "current user is not allowed to mount"; } class FuseEnv: public Environment { virtual void SetUp() { } }; void FuseTest::SetUp() { const char *node = "vfs.maxbcachebuf"; int val = 0; size_t size = sizeof(val); /* * XXX check_environment should be called from FuseEnv::SetUp, but * can't due to https://github.com/google/googletest/issues/2189 */ check_environment(); if (IsSkipped()) return; ASSERT_EQ(0, sysctlbyname(node, &val, &size, NULL, 0)) << strerror(errno); m_maxbcachebuf = val; try { m_mock = new MockFS(m_maxreadahead, m_allow_other, m_default_permissions, m_push_symlinks_in, m_ro, m_pm, m_init_flags, m_kernel_minor_version); /* * FUSE_ACCESS is called almost universally. Expecting it in * each test case would be super-annoying. Instead, set a * default expectation for FUSE_ACCESS and return ENOSYS. * * Individual test cases can override this expectation since * googlemock evaluates expectations in LIFO order. */ EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_ACCESS); }, Eq(true)), _) ).Times(AnyNumber()) .WillRepeatedly(Invoke(ReturnErrno(ENOSYS))); } catch (std::system_error err) { FAIL() << err.what(); } } void FuseTest::expect_access(uint64_t ino, mode_t access_mode, int error) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_ACCESS && in.header.nodeid == ino && in.body.access.mask == access_mode); }, Eq(true)), _) ).WillOnce(Invoke(ReturnErrno(error))); } void FuseTest::expect_destroy(int error) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_DESTROY); }, Eq(true)), _) ).WillOnce(Invoke( ReturnImmediate([&](auto in, auto& out) { m_mock->m_quit = true; out.header.len = sizeof(out.header); out.header.unique = in.header.unique; out.header.error = -error; }))); } void FuseTest::expect_flush(uint64_t ino, int times, ProcessMockerT r) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_FLUSH && in.header.nodeid == ino); }, Eq(true)), _) ).Times(times) .WillRepeatedly(Invoke(r)); } void FuseTest::expect_forget(uint64_t ino, uint64_t nlookup) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_FORGET && in.header.nodeid == ino && in.body.forget.nlookup == nlookup); }, Eq(true)), _) ).WillOnce(Invoke([](auto in __unused, auto &out __unused) { /* FUSE_FORGET has no response! */ })); } void FuseTest::expect_getattr(uint64_t ino, uint64_t size) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_GETATTR && in.header.nodeid == ino); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto i __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; // Must match nodeid out.body.attr.attr.mode = S_IFREG | 0644; out.body.attr.attr.size = size; out.body.attr.attr_valid = UINT64_MAX; }))); } void FuseTest::expect_lookup(const char *relpath, uint64_t ino, mode_t mode, uint64_t size, int times, uint64_t attr_valid, uid_t uid, gid_t gid) { EXPECT_LOOKUP(1, relpath) .Times(times) .WillRepeatedly(Invoke( ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; out.body.entry.attr.nlink = 1; out.body.entry.attr_valid = attr_valid; out.body.entry.attr.size = size; out.body.entry.attr.uid = uid; out.body.entry.attr.gid = gid; }))); } void FuseTest::expect_lookup_7_8(const char *relpath, uint64_t ino, mode_t mode, uint64_t size, int times, uint64_t attr_valid, uid_t uid, gid_t gid) { EXPECT_LOOKUP(1, relpath) .Times(times) .WillRepeatedly(Invoke( ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry_7_8); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; out.body.entry.attr.nlink = 1; out.body.entry.attr_valid = attr_valid; out.body.entry.attr.size = size; out.body.entry.attr.uid = uid; out.body.entry.attr.gid = gid; }))); } void FuseTest::expect_open(uint64_t ino, uint32_t flags, int times) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_OPEN && in.header.nodeid == ino); }, Eq(true)), _) ).Times(times) .WillRepeatedly(Invoke( ReturnImmediate([=](auto in __unused, auto& out) { out.header.len = sizeof(out.header); SET_OUT_HEADER_LEN(out, open); out.body.open.fh = FH; out.body.open.open_flags = flags; }))); } void FuseTest::expect_opendir(uint64_t ino) { /* opendir(3) calls fstatfs */ EXPECT_CALL(*m_mock, process( ResultOf([](auto in) { return (in.header.opcode == FUSE_STATFS); }, Eq(true)), _) ).WillRepeatedly(Invoke( ReturnImmediate([=](auto i __unused, auto& out) { SET_OUT_HEADER_LEN(out, statfs); }))); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_OPENDIR && in.header.nodeid == ino); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { out.header.len = sizeof(out.header); SET_OUT_HEADER_LEN(out, open); out.body.open.fh = FH; }))); } void FuseTest::expect_read(uint64_t ino, uint64_t offset, uint64_t isize, - uint64_t osize, const void *contents) + uint64_t osize, const void *contents, int flags) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_READ && in.header.nodeid == ino && in.body.read.fh == FH && in.body.read.offset == offset && - in.body.read.size == isize); + in.body.read.size == isize && + flags == -1 ? + (in.body.read.flags == O_RDONLY || + in.body.read.flags == O_RDWR) + : in.body.read.flags == (uint32_t)flags); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { out.header.len = sizeof(struct fuse_out_header) + osize; memmove(out.body.bytes, contents, osize); }))).RetiresOnSaturation(); } void FuseTest::expect_readdir(uint64_t ino, uint64_t off, std::vector &ents) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_READDIR && in.header.nodeid == ino && in.body.readdir.fh == FH && in.body.readdir.offset == off); }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnImmediate([=](auto in, auto& out) { struct fuse_dirent *fde = (struct fuse_dirent*)&(out.body); int i = 0; out.header.error = 0; out.header.len = 0; for (const auto& it: ents) { size_t entlen, entsize; fde->ino = it.d_fileno; fde->off = it.d_off; fde->type = it.d_type; fde->namelen = it.d_namlen; strncpy(fde->name, it.d_name, it.d_namlen); entlen = FUSE_NAME_OFFSET + fde->namelen; entsize = FUSE_DIRENT_SIZE(fde); /* * The FUSE protocol does not require zeroing out the * unused portion of the name. But it's a good * practice to prevent information disclosure to the * FUSE client, even though the client is usually the * kernel */ memset(fde->name + fde->namelen, 0, entsize - entlen); if (out.header.len + entsize > in.body.read.size) { printf("Overflow in readdir expectation: i=%d\n" , i); break; } out.header.len += entsize; fde = (struct fuse_dirent*) ((intmax_t*)fde + entsize / sizeof(intmax_t)); i++; } out.header.len += sizeof(out.header); }))); } void FuseTest::expect_release(uint64_t ino, uint64_t fh) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_RELEASE && in.header.nodeid == ino && in.body.release.fh == fh); }, Eq(true)), _) ).WillOnce(Invoke(ReturnErrno(0))); } void FuseTest::expect_releasedir(uint64_t ino, ProcessMockerT r) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_RELEASEDIR && in.header.nodeid == ino && in.body.release.fh == FH); }, Eq(true)), _) ).WillOnce(Invoke(r)); } void FuseTest::expect_unlink(uint64_t parent, const char *path, int error) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_UNLINK && 0 == strcmp(path, in.body.unlink) && in.header.nodeid == parent); }, Eq(true)), _) ).WillOnce(Invoke(ReturnErrno(error))); } void FuseTest::expect_write(uint64_t ino, uint64_t offset, uint64_t isize, uint64_t osize, uint32_t flags_set, uint32_t flags_unset, const void *contents) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { const char *buf = (const char*)in.body.bytes + sizeof(struct fuse_write_in); bool pid_ok; uint32_t wf = in.body.write.write_flags; if (wf & FUSE_WRITE_CACHE) pid_ok = true; else pid_ok = (pid_t)in.header.pid == getpid(); return (in.header.opcode == FUSE_WRITE && in.header.nodeid == ino && in.body.write.fh == FH && in.body.write.offset == offset && in.body.write.size == isize && pid_ok && (wf & flags_set) == flags_set && (wf & flags_unset) == 0 && + (in.body.write.flags == O_WRONLY || + in.body.write.flags == O_RDWR) && 0 == bcmp(buf, contents, isize)); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, write); out.body.write.size = osize; }))); } void FuseTest::expect_write_7_8(uint64_t ino, uint64_t offset, uint64_t isize, uint64_t osize, const void *contents) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { const char *buf = (const char*)in.body.bytes + FUSE_COMPAT_WRITE_IN_SIZE; bool pid_ok = (pid_t)in.header.pid == getpid(); return (in.header.opcode == FUSE_WRITE && in.header.nodeid == ino && in.body.write.fh == FH && in.body.write.offset == offset && in.body.write.size == isize && pid_ok && 0 == bcmp(buf, contents, isize)); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, write); out.body.write.size = osize; }))); } void get_unprivileged_id(uid_t *uid, gid_t *gid) { struct passwd *pw; struct group *gr; /* * First try "tests", Kyua's default unprivileged user. XXX after * GoogleTest gains a proper Kyua wrapper, get this with the Kyua API */ pw = getpwnam("tests"); if (pw == NULL) { /* Fall back to "nobody" */ pw = getpwnam("nobody"); } if (pw == NULL) GTEST_SKIP() << "Test requires an unprivileged user"; /* Use group "nobody", which is Kyua's default unprivileged group */ gr = getgrnam("nobody"); if (gr == NULL) GTEST_SKIP() << "Test requires an unprivileged group"; *uid = pw->pw_uid; *gid = gr->gr_gid; } void FuseTest::fork(bool drop_privs, int *child_status, std::function parent_func, std::function child_func) { sem_t *sem; int mprot = PROT_READ | PROT_WRITE; int mflags = MAP_ANON | MAP_SHARED; pid_t child; uid_t uid; gid_t gid; if (drop_privs) { get_unprivileged_id(&uid, &gid); if (IsSkipped()) return; } sem = (sem_t*)mmap(NULL, sizeof(*sem), mprot, mflags, -1, 0); ASSERT_NE(MAP_FAILED, sem) << strerror(errno); ASSERT_EQ(0, sem_init(sem, 1, 0)) << strerror(errno); if ((child = ::fork()) == 0) { /* In child */ int err = 0; if (sem_wait(sem)) { perror("sem_wait"); err = 1; goto out; } if (drop_privs && 0 != setegid(gid)) { perror("setegid"); err = 1; goto out; } if (drop_privs && 0 != setreuid(-1, uid)) { perror("setreuid"); err = 1; goto out; } err = child_func(); out: sem_destroy(sem); _exit(err); } else if (child > 0) { /* * In parent. Cleanup must happen here, because it's still * privileged. */ m_mock->m_child_pid = child; ASSERT_NO_FATAL_FAILURE(parent_func()); /* Signal the child process to go */ ASSERT_EQ(0, sem_post(sem)) << strerror(errno); ASSERT_LE(0, wait(child_status)) << strerror(errno); } else { FAIL() << strerror(errno); } munmap(sem, sizeof(*sem)); return; } static void usage(char* progname) { fprintf(stderr, "Usage: %s [-v]\n\t-v increase verbosity\n", progname); exit(2); } int main(int argc, char **argv) { int ch; FuseEnv *fuse_env = new FuseEnv; InitGoogleTest(&argc, argv); AddGlobalTestEnvironment(fuse_env); while ((ch = getopt(argc, argv, "v")) != -1) { switch (ch) { case 'v': verbosity++; break; default: usage(argv[0]); break; } } return (RUN_ALL_TESTS()); } Index: projects/fuse2/tests/sys/fs/fusefs/utils.hh =================================================================== --- projects/fuse2/tests/sys/fs/fusefs/utils.hh (revision 348318) +++ projects/fuse2/tests/sys/fs/fusefs/utils.hh (revision 348319) @@ -1,202 +1,202 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 The FreeBSD Foundation * * This software was developed by BFF Storage Systems, LLC under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* Nanoseconds to sleep, for tests that must */ #define NAP_NS (100'000'000) void get_unprivileged_id(uid_t *uid, gid_t *gid); inline void nap() { usleep(NAP_NS / 1000); } class FuseTest : public ::testing::Test { protected: uint32_t m_maxreadahead; uint32_t m_init_flags; bool m_allow_other; bool m_default_permissions; uint32_t m_kernel_minor_version; enum poll_method m_pm; bool m_push_symlinks_in; bool m_ro; MockFS *m_mock = NULL; const static uint64_t FH = 0xdeadbeef1a7ebabe; public: int m_maxbcachebuf; FuseTest(): /* * libfuse's default max_readahead is UINT_MAX, though it can * be lowered */ m_maxreadahead(UINT_MAX), m_init_flags(0), m_allow_other(false), m_default_permissions(false), m_kernel_minor_version(FUSE_KERNEL_MINOR_VERSION), m_pm(BLOCKING), m_push_symlinks_in(false), m_ro(false) {} virtual void SetUp(); virtual void TearDown() { if (m_mock) delete m_mock; } /* * Create an expectation that FUSE_ACCESS will be called once for the * given inode with the given access_mode, returning the given errno */ void expect_access(uint64_t ino, mode_t access_mode, int error); /* Expect FUSE_DESTROY and shutdown the daemon */ void expect_destroy(int error); /* * Create an expectation that FUSE_FLUSH will be called times times for * the given inode */ void expect_flush(uint64_t ino, int times, ProcessMockerT r); /* * Create an expectation that FUSE_FORGET will be called for the given * inode. There will be no response */ void expect_forget(uint64_t ino, uint64_t nlookup); /* * Create an expectation that FUSE_GETATTR will be called for the given * inode any number of times. It will respond with a few basic * attributes, like the given size and the mode S_IFREG | 0644 */ void expect_getattr(uint64_t ino, uint64_t size); /* * Create an expectation that FUSE_LOOKUP will be called for the given * path exactly times times and cache validity period. It will respond * with inode ino, mode mode, filesize size. */ void expect_lookup(const char *relpath, uint64_t ino, mode_t mode, uint64_t size, int times, uint64_t attr_valid = UINT64_MAX, uid_t uid = 0, gid_t gid = 0); /* The protocol 7.8 version of expect_lookup */ void expect_lookup_7_8(const char *relpath, uint64_t ino, mode_t mode, uint64_t size, int times, uint64_t attr_valid = UINT64_MAX, uid_t uid = 0, gid_t gid = 0); /* * Create an expectation that FUSE_OPEN will be called for the given * inode exactly times times. It will return with open_flags flags and * file handle FH. */ void expect_open(uint64_t ino, uint32_t flags, int times); /* * Create an expectation that FUSE_OPENDIR will be called exactly once * for inode ino. */ void expect_opendir(uint64_t ino); /* * Create an expectation that FUSE_READ will be called exactly once for * the given inode, at offset offset and with size isize. It will * return the first osize bytes from contents * * Protocol 7.8 tests can use this same expectation method because * nothing currently validates the size of the fuse_read_in struct. */ void expect_read(uint64_t ino, uint64_t offset, uint64_t isize, - uint64_t osize, const void *contents); + uint64_t osize, const void *contents, int flags = -1); /* * Create an expectation that FUSE_READIR will be called any number of * times on the given ino with the given offset, returning (by copy) * the provided entries */ void expect_readdir(uint64_t ino, uint64_t off, std::vector &ents); /* * Create an expectation that FUSE_RELEASE will be called exactly once * for the given inode and filehandle, returning success */ void expect_release(uint64_t ino, uint64_t fh); /* * Create an expectation that FUSE_RELEASEDIR will be called exactly * once for the given inode */ void expect_releasedir(uint64_t ino, ProcessMockerT r); /* * Create an expectation that FUSE_UNLINK will be called exactly once * for the given path, returning an errno */ void expect_unlink(uint64_t parent, const char *path, int error); /* * Create an expectation that FUSE_WRITE will be called exactly once * for the given inode, at offset offset, with size isize and buffer * contents. Any flags present in flags_set must be set, and any * present in flags_unset must not be set. Other flags are don't care. * It will return osize. */ void expect_write(uint64_t ino, uint64_t offset, uint64_t isize, uint64_t osize, uint32_t flags_set, uint32_t flags_unset, const void *contents); /* Protocol 7.8 version of expect_write */ void expect_write_7_8(uint64_t ino, uint64_t offset, uint64_t isize, uint64_t osize, const void *contents); /* * Helper that runs code in a child process. * * First, parent_func runs in the parent process. * Then, child_func runs in the child process, dropping privileges if * desired. * Finally, fusetest_fork returns. * * # Returns * * fusetest_fork may SKIP the test, which the caller should detect with * the IsSkipped() method. If not, then the child's exit status will * be returned in status. */ void fork(bool drop_privs, int *status, std::function parent_func, std::function child_func); }; Index: projects/fuse2/tests/sys/fs/fusefs/write.cc =================================================================== --- projects/fuse2/tests/sys/fs/fusefs/write.cc (revision 348318) +++ projects/fuse2/tests/sys/fs/fusefs/write.cc (revision 348319) @@ -1,768 +1,768 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 The FreeBSD Foundation * * This software was developed by BFF Storage Systems, LLC under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ extern "C" { #include #include #include #include #include #include #include #include } #include "mockfs.hh" #include "utils.hh" using namespace testing; class Write: public FuseTest { public: void expect_lookup(const char *relpath, uint64_t ino, uint64_t size) { FuseTest::expect_lookup(relpath, ino, S_IFREG | 0644, size, 1); } void expect_release(uint64_t ino, ProcessMockerT r) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_RELEASE && in.header.nodeid == ino); }, Eq(true)), _) ).WillRepeatedly(Invoke(r)); } void expect_write(uint64_t ino, uint64_t offset, uint64_t isize, uint64_t osize, const void *contents) { FuseTest::expect_write(ino, offset, isize, osize, 0, 0, contents); } }; class Write_7_8: public FuseTest { public: virtual void SetUp() { m_kernel_minor_version = 8; FuseTest::SetUp(); } void expect_lookup(const char *relpath, uint64_t ino, uint64_t size) { FuseTest::expect_lookup_7_8(relpath, ino, S_IFREG | 0644, size, 1); } }; class AioWrite: public Write { virtual void SetUp() { const char *node = "vfs.aio.enable_unsafe"; int val = 0; size_t size = sizeof(val); FuseTest::SetUp(); ASSERT_EQ(0, sysctlbyname(node, &val, &size, NULL, 0)) << strerror(errno); if (!val) GTEST_SKIP() << "vfs.aio.enable_unsafe must be set for this test"; } }; /* Tests for the write-through cache mode */ class WriteThrough: public Write { public: virtual void SetUp() { const char *cache_mode_node = "vfs.fusefs.data_cache_mode"; int val = 0; size_t size = sizeof(val); FuseTest::SetUp(); if (IsSkipped()) return; ASSERT_EQ(0, sysctlbyname(cache_mode_node, &val, &size, NULL, 0)) << strerror(errno); if (val != 1) GTEST_SKIP() << "vfs.fusefs.data_cache_mode must be set to 1 " "(writethrough) for this test"; } void expect_write(uint64_t ino, uint64_t offset, uint64_t isize, uint64_t osize, const void *contents) { FuseTest::expect_write(ino, offset, isize, osize, 0, FUSE_WRITE_CACHE, contents); } }; /* Tests for the writeback cache mode */ class WriteBack: public Write { public: virtual void SetUp() { const char *node = "vfs.fusefs.data_cache_mode"; int val = 0; size_t size = sizeof(val); FuseTest::SetUp(); if (IsSkipped()) return; ASSERT_EQ(0, sysctlbyname(node, &val, &size, NULL, 0)) << strerror(errno); if (val != 2) GTEST_SKIP() << "vfs.fusefs.data_cache_mode must be set to 2 " "(writeback) for this test"; } void expect_write(uint64_t ino, uint64_t offset, uint64_t isize, uint64_t osize, const void *contents) { FuseTest::expect_write(ino, offset, isize, osize, FUSE_WRITE_CACHE, 0, contents); } }; /* AIO writes need to set the header's pid field correctly */ /* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236379 */ TEST_F(AioWrite, DISABLED_aio_write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; uint64_t offset = 4096; int fd; ssize_t bufsize = strlen(CONTENTS); struct aiocb iocb, *piocb; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, offset, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); iocb.aio_nbytes = bufsize; iocb.aio_fildes = fd; iocb.aio_buf = (void *)CONTENTS; iocb.aio_offset = offset; iocb.aio_sigevent.sigev_notify = SIGEV_NONE; ASSERT_EQ(0, aio_write(&iocb)) << strerror(errno); ASSERT_EQ(bufsize, aio_waitcomplete(&piocb, NULL)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * When a file is opened with O_APPEND, we should forward that flag to * FUSE_OPEN (tested by Open.o_append) but still attempt to calculate the * offset internally. That way we'll work both with filesystems that * understand O_APPEND (and ignore the offset) and filesystems that don't (and * simply use the offset). * * Note that verifying the O_APPEND flag in FUSE_OPEN is done in the * Open.o_append test. */ TEST_F(Write, append) { const ssize_t BUFSIZE = 9; const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char CONTENTS[BUFSIZE] = "abcdefgh"; uint64_t ino = 42; /* * Set offset to a maxbcachebuf boundary so we don't need to RMW when * using writeback caching */ uint64_t initial_offset = m_maxbcachebuf; int fd; expect_lookup(RELPATH, ino, initial_offset); expect_open(ino, 0, 1); expect_write(ino, initial_offset, BUFSIZE, BUFSIZE, CONTENTS); /* Must open O_RDWR or fuse(4) implicitly sets direct_io */ fd = open(FULLPATH, O_RDWR | O_APPEND); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } TEST_F(Write, append_direct_io) { const ssize_t BUFSIZE = 9; const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char CONTENTS[BUFSIZE] = "abcdefgh"; uint64_t ino = 42; uint64_t initial_offset = 4096; int fd; expect_lookup(RELPATH, ino, initial_offset); expect_open(ino, FOPEN_DIRECT_IO, 1); expect_write(ino, initial_offset, BUFSIZE, BUFSIZE, CONTENTS); fd = open(FULLPATH, O_WRONLY | O_APPEND); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* A direct write should evict any overlapping cached data */ TEST_F(Write, direct_io_evicts_cache) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char CONTENTS0[] = "abcdefgh"; const char CONTENTS1[] = "ijklmnop"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS0) + 1; char readbuf[bufsize]; expect_lookup(RELPATH, ino, bufsize); expect_open(ino, 0, 1); expect_read(ino, 0, bufsize, bufsize, CONTENTS0); expect_write(ino, 0, bufsize, bufsize, CONTENTS1); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); // Prime cache ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); // Write directly, evicting cache ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno); ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS1, bufsize)) << strerror(errno); // Read again. Cache should be bypassed expect_read(ino, 0, bufsize, bufsize, CONTENTS1); ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno); ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); ASSERT_STREQ(readbuf, CONTENTS1); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * If the server doesn't return FOPEN_DIRECT_IO during FUSE_OPEN, then it's not * allowed to return a short write for that file handle. However, if it does * then we should still do our darndest to handle it by resending the unwritten * portion. */ TEST_F(Write, indirect_io_short_write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefghijklmnop"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); ssize_t bufsize0 = 11; ssize_t bufsize1 = strlen(CONTENTS) - bufsize0; const char *contents1 = CONTENTS + bufsize0; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize0, CONTENTS); expect_write(ino, bufsize0, bufsize1, bufsize1, contents1); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * When the direct_io option is used, filesystems are allowed to write less * data than requested. We should return the short write to userland. */ TEST_F(Write, direct_io_short_write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefghijklmnop"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); ssize_t halfbufsize = bufsize / 2; expect_lookup(RELPATH, ino, 0); expect_open(ino, FOPEN_DIRECT_IO, 1); expect_write(ino, 0, bufsize, halfbufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(halfbufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * An insidious edge case: the filesystem returns a short write, and the * difference between what we requested and what it actually wrote crosses an * iov element boundary */ TEST_F(Write, direct_io_short_write_iov) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS0 = "abcdefgh"; const char *CONTENTS1 = "ijklmnop"; const char *EXPECTED0 = "abcdefghijklmnop"; uint64_t ino = 42; int fd; ssize_t size0 = strlen(CONTENTS0) - 1; ssize_t size1 = strlen(CONTENTS1) + 1; ssize_t totalsize = size0 + size1; struct iovec iov[2]; expect_lookup(RELPATH, ino, 0); expect_open(ino, FOPEN_DIRECT_IO, 1); expect_write(ino, 0, totalsize, size0, EXPECTED0); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); iov[0].iov_base = (void*)CONTENTS0; iov[0].iov_len = strlen(CONTENTS0); iov[1].iov_base = (void*)CONTENTS1; iov[1].iov_len = strlen(CONTENTS1); ASSERT_EQ(size0, writev(fd, iov, 2)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * If the kernel cannot be sure which uid, gid, or pid was responsible for a * write, then it must set the FUSE_WRITE_CACHE bit */ /* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236378 */ // TODO: check vfs.fusefs.mmap_enable TEST_F(Write, mmap) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); void *p; uint64_t offset = 10; size_t len; void *zeros, *expected; len = getpagesize(); zeros = calloc(1, len); ASSERT_NE(NULL, zeros); expected = calloc(1, len); ASSERT_NE(NULL, expected); memmove((uint8_t*)expected + offset, CONTENTS, bufsize); expect_lookup(RELPATH, ino, len); expect_open(ino, 0, 1); expect_read(ino, 0, len, len, zeros); /* * Writes from the pager may or may not be associated with the correct * pid, so they must set FUSE_WRITE_CACHE. */ FuseTest::expect_write(ino, 0, len, len, FUSE_WRITE_CACHE, 0, expected); expect_flush(ino, 1, ReturnErrno(0)); expect_release(ino, ReturnErrno(0)); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ASSERT_NE(MAP_FAILED, p) << strerror(errno); memmove((uint8_t*)p + offset, CONTENTS, bufsize); ASSERT_EQ(0, munmap(p, len)) << strerror(errno); close(fd); // Write mmap'd data on close free(expected); free(zeros); } /* In WriteThrough mode, a write should evict overlapping cached data */ TEST_F(WriteThrough, evicts_read_cache) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; ssize_t bufsize = 65536; /* End the write in the middle of a page */ ssize_t wrsize = bufsize - 1000; char *contents0, *contents1, *readbuf, *expected; uint64_t ino = 42; int fd; contents0 = (char*)malloc(bufsize); memset(contents0, 'X', bufsize); contents0[bufsize - 1] = '\0'; // Null-terminate contents1 = (char*)malloc(wrsize); memset(contents1, 'Y', wrsize); readbuf = (char*)calloc(bufsize, 1); expected = (char*)malloc(bufsize); memset(expected, 'Y', wrsize); memset(expected + wrsize, 'X', bufsize - wrsize); expected[bufsize - 1] = '\0'; // Null-terminate expect_lookup(RELPATH, ino, bufsize); expect_open(ino, 0, 1); expect_read(ino, 0, bufsize, bufsize, contents0); expect_write(ino, 0, wrsize, wrsize, contents1); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); // Prime cache ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); // Write directly, evicting cache ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(wrsize, write(fd, contents1, wrsize)) << strerror(errno); // Read again. Cache should be bypassed expect_read(ino, 0, bufsize, bufsize, expected); ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); ASSERT_STREQ(readbuf, expected); /* Deliberately leak fd. close(2) will be tested in release.cc */ } TEST_F(WriteThrough, pwrite) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; uint64_t offset = 4096; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, offset, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } TEST_F(Write, write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* fuse(4) should not issue writes of greater size than the daemon requests */ TEST_F(Write, write_large) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; int *contents; uint64_t ino = 42; int fd; ssize_t halfbufsize, bufsize; halfbufsize = m_mock->m_max_write; bufsize = halfbufsize * 2; contents = (int*)malloc(bufsize); ASSERT_NE(NULL, contents); for (int i = 0; i < (int)bufsize / (int)sizeof(i); i++) { contents[i] = i; } expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, halfbufsize, halfbufsize, contents); expect_write(ino, halfbufsize, halfbufsize, halfbufsize, &contents[halfbufsize / sizeof(int)]); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, contents, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ free(contents); } TEST_F(Write, write_nothing) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = ""; uint64_t ino = 42; int fd; ssize_t bufsize = 0; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } TEST_F(Write_7_8, write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write_7_8(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* In writeback mode, dirty data should be written on close */ TEST_F(WriteBack, close) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_SETATTR); }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; // Must match nodeid }))); expect_flush(ino, 1, ReturnErrno(0)); expect_release(ino, ReturnErrno(0)); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); close(fd); } /* * In writeback mode, writes to an O_WRONLY file could trigger reads from the * server. The FUSE protocol explicitly allows that. */ TEST_F(WriteBack, rmw) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; const char *INITIAL = "XXXXXXXXXX"; uint64_t ino = 42; uint64_t offset = 1; off_t fsize = 10; int fd; ssize_t bufsize = strlen(CONTENTS); FuseTest::expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1); expect_open(ino, 0, 1); - expect_read(ino, 0, fsize, fsize, INITIAL); + expect_read(ino, 0, fsize, fsize, INITIAL, O_WRONLY); expect_write(ino, offset, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * Without direct_io, writes should be committed to cache */ TEST_F(WriteBack, writeback) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); char readbuf[bufsize]; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* * A subsequent read should be serviced by cache, without querying the * filesystem daemon */ ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * With O_DIRECT, writes should be not committed to cache. Admittedly this is * an odd test, because it would be unusual to use O_DIRECT for writes but not * reads. */ TEST_F(WriteBack, o_direct) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); char readbuf[bufsize]; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); FuseTest::expect_write(ino, 0, bufsize, bufsize, 0, FUSE_WRITE_CACHE, CONTENTS); expect_read(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR | O_DIRECT); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* A subsequent read must query the daemon because cache is empty */ ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* * Without direct_io, writes should be committed to cache */ /* * Disabled because we don't yet implement write-through caching. No bugzilla * entry, because that's a feature request, not a bug. */ TEST_F(WriteThrough, DISABLED_writethrough) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); char readbuf[bufsize]; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* * A subsequent read should be serviced by cache, without querying the * filesystem daemon */ ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); /* Deliberately leak fd. close(2) will be tested in release.cc */ } /* With writethrough caching, writes update the cached file size */ TEST_F(WriteThrough, update_file_size) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; struct stat sb; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); EXPECT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Get cached attributes */ ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno); ASSERT_EQ(bufsize, sb.st_size); /* Deliberately leak fd. close(2) will be tested in release.cc */ }