Index: projects/fuse2/sys/fs/fuse/fuse_internal.c
===================================================================
--- projects/fuse2/sys/fs/fuse/fuse_internal.c	(revision 345875)
+++ projects/fuse2/sys/fs/fuse/fuse_internal.c	(revision 345876)
@@ -1,709 +1,716 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sdt.h>
 #include <sys/sx.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/filedesc.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/dirent.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysctl.h>
 #include <sys/priv.h>
 
 #include "fuse.h"
 #include "fuse_file.h"
 #include "fuse_internal.h"
 #include "fuse_ipc.h"
 #include "fuse_node.h"
 #include "fuse_file.h"
 
 SDT_PROVIDER_DECLARE(fuse);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(fuse, , internal, trace, "int", "char*");
 
 #ifdef ZERO_PAD_INCOMPLETE_BUFS
 static int isbzero(void *buf, size_t len);
 
 #endif
 
 /* Synchronously send a FUSE_ACCESS operation */
 int
 fuse_internal_access(struct vnode *vp,
     mode_t mode,
     struct fuse_access_param *facp,
     struct thread *td,
     struct ucred *cred)
 {
 	int err = 0;
 	uint32_t mask = 0;
 	int dataflags;
 	int vtype;
 	struct mount *mp;
 	struct fuse_dispatcher fdi;
 	struct fuse_access_in *fai;
 	struct fuse_data *data;
 
 	/* NOT YET DONE */
 	/*
 	 * If this vnop gives you trouble, just return 0 here for a lazy
 	 * kludge.
 	 */
 	/* return 0;*/
 
 	mp = vnode_mount(vp);
 	vtype = vnode_vtype(vp);
 
 	data = fuse_get_mpdata(mp);
 	dataflags = data->dataflags;
 
 	if ((mode & VWRITE) && vfs_isrdonly(mp)) {
 		return EACCES;
 	}
 	/* Unless explicitly permitted, deny everyone except the fs owner. */
 	    if (vnode_isvroot(vp) && !(facp->facc_flags & FACCESS_NOCHECKSPY)) {
 		if (!(dataflags & FSESS_DAEMON_CAN_SPY)) {
 			int denied = fuse_match_cred(data->daemoncred,
 			    cred);
 
 			if (denied) {
 				return EPERM;
 			}
 		}
 		facp->facc_flags |= FACCESS_NOCHECKSPY;
 	}
 	if (!(facp->facc_flags & FACCESS_DO_ACCESS)) {
 		return 0;
 	}
 	if (((vtype == VREG) && (mode & VEXEC))) {
 #ifdef NEED_MOUNT_ARGUMENT_FOR_THIS
 		/* Let	 the kernel handle this through open / close heuristics.*/
 		    return ENOTSUP;
 #else
 		    /* 	Let the kernel handle this. */
 		    return 0;
 #endif
 	}
 	if (!fsess_isimpl(mp, FUSE_ACCESS)) {
 		/* Let the kernel handle this. */
 		    return 0;
 	}
 	if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
 		/* Let the kernel handle this. */
 		    return 0;
 	}
 	if ((mode & VADMIN) != 0) {
 		err = priv_check_cred(cred, PRIV_VFS_ADMIN);
 		if (err) {
 			return err;
 		}
 	}
 	if ((mode & (VWRITE | VAPPEND | VADMIN)) != 0) {
 		mask |= W_OK;
 	}
 	if ((mode & VREAD) != 0) {
 		mask |= R_OK;
 	}
 	if ((mode & VEXEC) != 0) {
 		mask |= X_OK;
 	}
 	bzero(&fdi, sizeof(fdi));
 
 	fdisp_init(&fdi, sizeof(*fai));
 	fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred);
 
 	fai = fdi.indata;
 	fai->mask = F_OK;
 	fai->mask |= mask;
 
 	err = fdisp_wait_answ(&fdi);
 	fdisp_destroy(&fdi);
 
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_ACCESS);
 		err = 0;
 	}
 	return err;
 }
 
 /*
  * Cache FUSE attributes from feo, in attr cache associated with vnode 'vp'.
  * Optionally, if argument 'vap' is not NULL, store a copy of the converted
  * attributes there as well.
  *
  * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do
  * return the result to the caller).
  */
 void
 fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
 	uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap)
 {
 	struct mount *mp;
 	struct fuse_vnode_data *fvdat;
 	struct vattr *vp_cache_at;
 
 	mp = vnode_mount(vp);
 	fvdat = VTOFUD(vp);
 
 	/* Honor explicit do-not-cache requests from user filesystems. */
 	if (attr_valid == 0 && attr_valid_nsec == 0)
 		fvdat->valid_attr_cache = false;
 	else
 		fvdat->valid_attr_cache = true;
 
 	vp_cache_at = VTOVA(vp);
 
 	if (vap == NULL && vp_cache_at == NULL)
 		return;
 
 	if (vap == NULL)
 		vap = vp_cache_at;
 
 	vattr_null(vap);
 
 	vap->va_fsid = mp->mnt_stat.f_fsid.val[0];
 	vap->va_fileid = attr->ino;
 	vap->va_mode = attr->mode & ~S_IFMT;
 	vap->va_nlink     = attr->nlink;
 	vap->va_uid       = attr->uid;
 	vap->va_gid       = attr->gid;
 	vap->va_rdev      = attr->rdev;
 	vap->va_size      = attr->size;
 	/* XXX on i386, seconds are truncated to 32 bits */
 	vap->va_atime.tv_sec  = attr->atime;
 	vap->va_atime.tv_nsec = attr->atimensec;
 	vap->va_mtime.tv_sec  = attr->mtime;
 	vap->va_mtime.tv_nsec = attr->mtimensec;
 	vap->va_ctime.tv_sec  = attr->ctime;
 	vap->va_ctime.tv_nsec = attr->ctimensec;
 	vap->va_blocksize = PAGE_SIZE;
 	vap->va_type = IFTOVT(attr->mode);
 	vap->va_bytes = attr->blocks * S_BLKSIZE;
 	vap->va_flags = 0;
 
 	if (vap != vp_cache_at && vp_cache_at != NULL)
 		memcpy(vp_cache_at, vap, sizeof(*vap));
 }
 
 
 /* fsync */
 
 int
 fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio)
 {
 	if (tick->tk_aw_ohead.error == ENOSYS) {
 		fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick));
 	}
 	return 0;
 }
 
 int
 fuse_internal_fsync(struct vnode *vp,
     struct thread *td,
     int waitfor,
     bool datasync)
 {
-	struct fuse_fsync_in *ffsi;
+	struct fuse_fsync_in *ffsi = NULL;
 	struct fuse_dispatcher fdi;
 	struct fuse_filehandle *fufh;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	int op = FUSE_FSYNC;
 	int err = 0;
 
 	if (!fsess_isimpl(vnode_mount(vp),
 	    (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
 		return 0;
 	}
 	if (vnode_isdir(vp))
 		op = FUSE_FSYNCDIR;
+
+	fdisp_init(&fdi, sizeof(*ffsi));
 	/*
 	 * fsync every open file handle for this file, because we can't be sure
 	 * which file handle the caller is really referring to.
 	 */
 	LIST_FOREACH(fufh, &fvdat->handles, next) {
-		fdisp_init(&fdi, sizeof(*ffsi));
-		fdisp_make_vp(&fdi, op, vp, td, NULL);
+		if (ffsi == NULL)
+			fdisp_make_vp(&fdi, op, vp, td, NULL);
+		else
+			fdisp_refresh_vp(&fdi, op, vp, td, NULL);
 		ffsi = fdi.indata;
 		ffsi->fh = fufh->fh_id;
+		ffsi->fsync_flags = 0;
 
 		if (datasync)
 			ffsi->fsync_flags = 1;
 
 		if (waitfor == MNT_WAIT) {
 			err = fdisp_wait_answ(&fdi);
 		} else {
 			fuse_insert_callback(fdi.tick,
 				fuse_internal_fsync_callback);
 			fuse_insert_message(fdi.tick);
 		}
-		fdisp_destroy(&fdi);
 	}
+	fdisp_destroy(&fdi);
 
 	return err;
 }
 
 /* readdir */
 
 int
 fuse_internal_readdir(struct vnode *vp,
     struct uio *uio,
     struct fuse_filehandle *fufh,
     struct fuse_iov *cookediov)
 {
 	int err = 0;
 	struct fuse_dispatcher fdi;
-	struct fuse_read_in *fri;
+	struct fuse_read_in *fri = NULL;
 
 	if (uio_resid(uio) == 0) {
 		return 0;
 	}
 	fdisp_init(&fdi, 0);
 
 	/*
 	 * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p
 	 * I/O).
 	 */
 
 	while (uio_resid(uio) > 0) {
-
 		fdi.iosize = sizeof(*fri);
-		fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
+		if (fri == NULL)
+			fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
+		else
+			fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
 
 		fri = fdi.indata;
 		fri->fh = fufh->fh_id;
 		fri->offset = uio_offset(uio);
 		fri->size = MIN(uio->uio_resid,
 		    fuse_get_mpdata(vp->v_mount)->max_read);
 
 		    if ((err = fdisp_wait_answ(&fdi))) {
 			break;
 		}
 		if ((err = fuse_internal_readdir_processdata(uio, fri->size, fdi.answ,
 		    fdi.iosize, cookediov))) {
 			break;
 		}
 	}
 
 	fdisp_destroy(&fdi);
 	return ((err == -1) ? 0 : err);
 }
 
 int
 fuse_internal_readdir_processdata(struct uio *uio,
     size_t reqsize,
     void *buf,
     size_t bufsize,
     void *param)
 {
 	int err = 0;
 	int cou = 0;
 	int bytesavail;
 	size_t freclen;
 
 	struct dirent *de;
 	struct fuse_dirent *fudge;
 	struct fuse_iov *cookediov = param;
 
 	if (bufsize < FUSE_NAME_OFFSET) {
 		return -1;
 	}
 	for (;;) {
 
 		if (bufsize < FUSE_NAME_OFFSET) {
 			err = -1;
 			break;
 		}
 		fudge = (struct fuse_dirent *)buf;
 		freclen = FUSE_DIRENT_SIZE(fudge);
 
 		cou++;
 
 		if (bufsize < freclen) {
 			err = ((cou == 1) ? -1 : 0);
 			break;
 		}
 #ifdef ZERO_PAD_INCOMPLETE_BUFS
 		if (isbzero(buf, FUSE_NAME_OFFSET)) {
 			err = -1;
 			break;
 		}
 #endif
 
 		if (!fudge->namelen || fudge->namelen > MAXNAMLEN) {
 			err = EINVAL;
 			break;
 		}
 		bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *)
 					    &fudge->namelen);
 
 		if (bytesavail > uio_resid(uio)) {
 			err = -1;
 			break;
 		}
-		fiov_refresh(cookediov);
 		fiov_adjust(cookediov, bytesavail);
+		bzero(cookediov->base, bytesavail);
 
 		de = (struct dirent *)cookediov->base;
 		de->d_fileno = fudge->ino;
 		de->d_reclen = bytesavail;
 		de->d_type = fudge->type;
 		de->d_namlen = fudge->namelen;
 		memcpy((char *)cookediov->base + sizeof(struct dirent) - 
 		       MAXNAMLEN - 1,
 		       (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
 		dirent_terminate(de);
 
 		err = uiomove(cookediov->base, cookediov->len, uio);
 		if (err) {
 			break;
 		}
 		buf = (char *)buf + freclen;
 		bufsize -= freclen;
 		uio_setoffset(uio, fudge->off);
 	}
 
 	return err;
 }
 
 /* remove */
 
 int
 fuse_internal_remove(struct vnode *dvp,
     struct vnode *vp,
     struct componentname *cnp,
     enum fuse_opcode op)
 {
 	struct fuse_dispatcher fdi;
 	int err = 0;
 
 	fdisp_init(&fdi, cnp->cn_namelen + 1);
 	fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred);
 
 	memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
 	((char *)fdi.indata)[cnp->cn_namelen] = '\0';
 
 	err = fdisp_wait_answ(&fdi);
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /* rename */
 
 int
 fuse_internal_rename(struct vnode *fdvp,
     struct componentname *fcnp,
     struct vnode *tdvp,
     struct componentname *tcnp)
 {
 	struct fuse_dispatcher fdi;
 	struct fuse_rename_in *fri;
 	int err = 0;
 
 	fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2);
 	fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred);
 
 	fri = fdi.indata;
 	fri->newdir = VTOI(tdvp);
 	memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr,
 	    fcnp->cn_namelen);
 	((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0';
 	memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1,
 	    tcnp->cn_nameptr, tcnp->cn_namelen);
 	((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen +
 	    tcnp->cn_namelen + 1] = '\0';
 
 	err = fdisp_wait_answ(&fdi);
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /* strategy */
 
 /* entity creation */
 
 void
 fuse_internal_newentry_makerequest(struct mount *mp,
     uint64_t dnid,
     struct componentname *cnp,
     enum fuse_opcode op,
     void *buf,
     size_t bufsize,
     struct fuse_dispatcher *fdip)
 {
 	fdip->iosize = bufsize + cnp->cn_namelen + 1;
 
 	fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred);
 	memcpy(fdip->indata, buf, bufsize);
 	memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen);
 	((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0';
 }
 
 int
 fuse_internal_newentry_core(struct vnode *dvp,
     struct vnode **vpp,
     struct componentname *cnp,
     enum vtype vtyp,
     struct fuse_dispatcher *fdip)
 {
 	int err = 0;
 	struct fuse_entry_out *feo;
 	struct mount *mp = vnode_mount(dvp);
 
 	if ((err = fdisp_wait_answ(fdip))) {
 		return err;
 	}
 	feo = fdip->answ;
 
 	if ((err = fuse_internal_checkentry(feo, vtyp))) {
 		return err;
 	}
 	err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp);
 	if (err) {
 		fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred,
 		    feo->nodeid, 1);
 		return err;
 	}
 	fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
 		feo->attr_valid_nsec, NULL);
 
 	return err;
 }
 
 int
 fuse_internal_newentry(struct vnode *dvp,
     struct vnode **vpp,
     struct componentname *cnp,
     enum fuse_opcode op,
     void *buf,
     size_t bufsize,
     enum vtype vtype)
 {
 	int err;
 	struct fuse_dispatcher fdi;
 	struct mount *mp = vnode_mount(dvp);
 
 	fdisp_init(&fdi, 0);
 	fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf,
 	    bufsize, &fdi);
 	err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi);
 	fdisp_destroy(&fdi);
 
 	return err;
 }
 
 /* entity destruction */
 
 int
 fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio)
 {
 	fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL,
 	    ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1);
 
 	return 0;
 }
 
 void
 fuse_internal_forget_send(struct mount *mp,
     struct thread *td,
     struct ucred *cred,
     uint64_t nodeid,
     uint64_t nlookup)
 {
 
 	struct fuse_dispatcher fdi;
 	struct fuse_forget_in *ffi;
 
 	/*
          * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu",
          *         (long long unsigned) nodeid));
          */
 
 	fdisp_init(&fdi, sizeof(*ffi));
 	fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred);
 
 	ffi = fdi.indata;
 	ffi->nlookup = nlookup;
 
 	fuse_insert_message(fdi.tick);
 	fdisp_destroy(&fdi);
 }
 
 void
 fuse_internal_vnode_disappear(struct vnode *vp)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 
 	ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear");
 	fvdat->flag |= FN_REVOKED;
 	fvdat->valid_attr_cache = false;
 	cache_purge(vp);
 }
 
 /* fuse start/stop */
 
 int
 fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
 {
 	int err = 0;
 	struct fuse_data *data = tick->tk_data;
 	struct fuse_init_out *fiio;
 
 	if ((err = tick->tk_aw_ohead.error)) {
 		goto out;
 	}
 	if ((err = fticket_pull(tick, uio))) {
 		goto out;
 	}
 	fiio = fticket_resp(tick)->base;
 
 	/* XXX: Do we want to check anything further besides this? */
 	if (fiio->major < 7) {
 		SDT_PROBE2(fuse, , internal, trace, 1,
 			"userpace version too low");
 		err = EPROTONOSUPPORT;
 		goto out;
 	}
 	data->fuse_libabi_major = fiio->major;
 	data->fuse_libabi_minor = fiio->minor;
 
 	if (fuse_libabi_geq(data, 7, 5)) {
 		if (fticket_resp(tick)->len == sizeof(struct fuse_init_out)) {
 			data->max_write = fiio->max_write;
 		} else {
 			err = EINVAL;
 		}
 	} else {
 		/* Old fix values */
 		data->max_write = 4096;
 	}
 
 out:
 	if (err) {
 		fdata_set_dead(data);
 	}
 	FUSE_LOCK();
 	data->dataflags |= FSESS_INITED;
 	wakeup(&data->ticketer);
 	FUSE_UNLOCK();
 
 	return 0;
 }
 
 void
 fuse_internal_send_init(struct fuse_data *data, struct thread *td)
 {
 	struct fuse_init_in *fiii;
 	struct fuse_dispatcher fdi;
 
 	fdisp_init(&fdi, sizeof(*fiii));
 	fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL);
 	fiii = fdi.indata;
 	fiii->major = FUSE_KERNEL_VERSION;
 	fiii->minor = FUSE_KERNEL_MINOR_VERSION;
 	/* 
 	 * fusefs currently doesn't do any readahead other than fetching whole
 	 * buffer cache block sized regions at once.  So the max readahead is
 	 * the size of a buffer cache block.
 	 */
 	fiii->max_readahead = maxbcachebuf;
 	fiii->flags = 0;
 
 	fuse_insert_callback(fdi.tick, fuse_internal_init_callback);
 	fuse_insert_message(fdi.tick);
 	fdisp_destroy(&fdi);
 }
 
 #ifdef ZERO_PAD_INCOMPLETE_BUFS
 static int
 isbzero(void *buf, size_t len)
 {
 	int i;
 
 	for (i = 0; i < len; i++) {
 		if (((char *)buf)[i])
 			return (0);
 	}
 
 	return (1);
 }
 
 #endif
Index: projects/fuse2/sys/fs/fuse/fuse_io.c
===================================================================
--- projects/fuse2/sys/fs/fuse/fuse_io.c	(revision 345875)
+++ projects/fuse2/sys/fs/fuse/fuse_io.c	(revision 345876)
@@ -1,857 +1,886 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/filedesc.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 
 #include "fuse.h"
 #include "fuse_file.h"
 #include "fuse_node.h"
 #include "fuse_internal.h"
 #include "fuse_ipc.h"
 #include "fuse_io.h"
 
 SDT_PROVIDER_DECLARE(fuse);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(fuse, , io, trace, "int", "char*");
 
 static int 
 fuse_read_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh);
 static int 
 fuse_read_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid);
 static int 
 fuse_write_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, int ioflag);
 static int 
 fuse_write_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid);
 
 SDT_PROBE_DEFINE5(fuse, , io, io_dispatch, "struct vnode*", "struct uio*",
 		"int", "struct ucred*", "struct fuse_filehandle*");
 int
 fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag,
     struct ucred *cred, pid_t pid)
 {
 	struct fuse_filehandle *fufh;
 	int err, directio;
 	int fflag;
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 
 	fflag = (uio->uio_rw == UIO_READ) ? FREAD : FWRITE;
 	err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
 	if (err) {
 		printf("FUSE: io dispatch: filehandles are closed\n");
 		return err;
 	}
 	SDT_PROBE5(fuse, , io, io_dispatch, vp, uio, ioflag, cred, fufh);
 
 	/*
          * Ideally, when the daemon asks for direct io at open time, the
          * standard file flag should be set according to this, so that would
          * just change the default mode, which later on could be changed via
          * fcntl(2).
          * But this doesn't work, the O_DIRECT flag gets cleared at some point
          * (don't know where). So to make any use of the Fuse direct_io option,
          * we hardwire it into the file's private data (similarly to Linux,
          * btw.).
          */
 	directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp));
 
 	switch (uio->uio_rw) {
 	case UIO_READ:
 		if (directio) {
 			SDT_PROBE2(fuse, , io, trace, 1,
 				"direct read of vnode");
 			err = fuse_read_directbackend(vp, uio, cred, fufh);
 		} else {
 			SDT_PROBE2(fuse, , io, trace, 1,
 				"buffered read of vnode");
 			err = fuse_read_biobackend(vp, uio, cred, fufh, pid);
 		}
 		break;
 	case UIO_WRITE:
 		/*
 		 * Kludge: simulate write-through caching via write-around
 		 * caching.  Same effect, as far as never caching dirty data,
 		 * but slightly pessimal in that newly written data is not
 		 * cached.
 		 */
 		if (directio || fuse_data_cache_mode == FUSE_CACHE_WT) {
 			SDT_PROBE2(fuse, , io, trace, 1,
 				"direct write of vnode");
 			err = fuse_write_directbackend(vp, uio, cred, fufh,
 				ioflag);
 		} else {
 			SDT_PROBE2(fuse, , io, trace, 1,
 				"buffered write of vnode");
 			err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag,
 				pid);
 		}
 		break;
 	default:
 		panic("uninterpreted mode passed to fuse_io_dispatch");
 	}
 
 	return (err);
 }
 
 SDT_PROBE_DEFINE3(fuse, , io, read_bio_backend_start, "int", "int", "int");
 SDT_PROBE_DEFINE2(fuse, , io, read_bio_backend_feed, "int", "int");
 SDT_PROBE_DEFINE3(fuse, , io, read_bio_backend_end, "int", "ssize_t", "int");
 static int
 fuse_read_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid)
 {
 	struct buf *bp;
 	daddr_t lbn;
 	int bcount;
 	int err = 0, n = 0, on = 0;
 	off_t filesize;
 
 	const int biosize = fuse_iosize(vp);
 
 	if (uio->uio_resid == 0)
 		return (0);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	bcount = biosize;
 	filesize = VTOFUD(vp)->filesize;
 
 	do {
 		if (fuse_isdeadfs(vp)) {
 			err = ENXIO;
 			break;
 		}
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 
 		SDT_PROBE3(fuse, , io, read_bio_backend_start,
 			biosize, (int)lbn, on);
 
 		/*
 	         * Obtain the buffer cache block.  Figure out the buffer size
 	         * when we are at EOF.  If we are modifying the size of the
 	         * buffer based on an EOF condition we need to hold
 	         * nfs_rslock() through obtaining the buffer to prevent
 	         * a potential writer-appender from messing with n_size.
 	         * Otherwise we may accidentally truncate the buffer and
 	         * lose dirty data.
 	         *
 	         * Note that bcount is *not* DEV_BSIZE aligned.
 	         */
 		if ((off_t)lbn * biosize >= filesize) {
 			bcount = 0;
 		} else if ((off_t)(lbn + 1) * biosize > filesize) {
 			bcount = filesize - (off_t)lbn *biosize;
 		}
 		bp = getblk(vp, lbn, bcount, PCATCH, 0, 0);
 
 		if (!bp)
 			return (EINTR);
 
 		/*
 	         * If B_CACHE is not set, we must issue the read.  If this
 	         * fails, we return an error.
 	         */
 
 		if ((bp->b_flags & B_CACHE) == 0) {
 			bp->b_iocmd = BIO_READ;
 			vfs_busy_pages(bp, 0);
 			err = fuse_io_strategy(vp, bp);
 			if (err) {
 				brelse(bp);
 				return (err);
 			}
 		}
 		/*
 	         * on is the offset into the current bp.  Figure out how many
 	         * bytes we can copy out of the bp.  Note that bcount is
 	         * NOT DEV_BSIZE aligned.
 	         *
 	         * Then figure out how many bytes we can copy into the uio.
 	         */
 
 		n = 0;
 		if (on < bcount)
 			n = MIN((unsigned)(bcount - on), uio->uio_resid);
 		if (n > 0) {
 			SDT_PROBE2(fuse, , io, read_bio_backend_feed,
 				n, n + (int)bp->b_resid);
 			err = uiomove(bp->b_data + on, n, uio);
 		}
 		brelse(bp);
 		SDT_PROBE3(fuse, , io, read_bio_backend_end, err,
 			uio->uio_resid, n);
 	} while (err == 0 && uio->uio_resid > 0 && n > 0);
 
 	return (err);
 }
 
 SDT_PROBE_DEFINE1(fuse, , io, read_directbackend_start, "struct fuse_read_in*");
 SDT_PROBE_DEFINE2(fuse, , io, read_directbackend_complete,
 	"struct fuse_dispatcher*", "struct uio*");
 
 static int
 fuse_read_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh)
 {
 	struct fuse_dispatcher fdi;
 	struct fuse_read_in *fri;
 	int err = 0;
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	fdisp_init(&fdi, 0);
 
 	/*
          * XXX In "normal" case we use an intermediate kernel buffer for
          * transmitting data from daemon's context to ours. Eventually, we should
          * get rid of this. Anyway, if the target uio lives in sysspace (we are
          * called from pageops), and the input data doesn't need kernel-side
          * processing (we are not called from readdir) we can already invoke
          * an optimized, "peer-to-peer" I/O routine.
          */
 	while (uio->uio_resid > 0) {
 		fdi.iosize = sizeof(*fri);
 		fdisp_make_vp(&fdi, FUSE_READ, vp, uio->uio_td, cred);
 		fri = fdi.indata;
 		fri->fh = fufh->fh_id;
 		fri->offset = uio->uio_offset;
 		fri->size = MIN(uio->uio_resid,
 		    fuse_get_mpdata(vp->v_mount)->max_read);
 
 		SDT_PROBE1(fuse, , io, read_directbackend_start, fri);
 
 		if ((err = fdisp_wait_answ(&fdi)))
 			goto out;
 
 		SDT_PROBE2(fuse, , io, read_directbackend_complete,
 			fdi.iosize, uio);
 
 		if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio)))
 			break;
 		if (fdi.iosize < fri->size)
 			break;
 	}
 
 out:
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 static int
 fuse_write_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, int ioflag)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_write_in *fwi;
+	struct fuse_write_out *fwo;
 	struct fuse_dispatcher fdi;
 	size_t chunksize;
+	void *fwi_data;
+	off_t as_written_offset;
 	int diff;
 	int err = 0;
+	bool direct_io = fufh->fuse_open_flags & FOPEN_DIRECT_IO;
 
 	if (uio->uio_resid == 0)
 		return (0);
 	if (ioflag & IO_APPEND)
 		uio_setoffset(uio, fvdat->filesize);
 
 	fdisp_init(&fdi, 0);
 
 	while (uio->uio_resid > 0) {
 		chunksize = MIN(uio->uio_resid,
 		    fuse_get_mpdata(vp->v_mount)->max_write);
 
 		fdi.iosize = sizeof(*fwi) + chunksize;
 		fdisp_make_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred);
 
 		fwi = fdi.indata;
 		fwi->fh = fufh->fh_id;
 		fwi->offset = uio->uio_offset;
 		fwi->size = chunksize;
+		fwi_data = (char *)fdi.indata + sizeof(*fwi);
 
-		if ((err = uiomove((char *)fdi.indata + sizeof(*fwi),
-		    chunksize, uio)))
+		if ((err = uiomove(fwi_data, chunksize, uio)))
 			break;
 
+retry:
 		if ((err = fdisp_wait_answ(&fdi)))
 			break;
 
+		fwo = ((struct fuse_write_out *)fdi.answ);
+
 		/* Adjust the uio in the case of short writes */
-		diff = chunksize - ((struct fuse_write_out *)fdi.answ)->size;
-		if (diff < 0) {
-			err = EINVAL;
-			break;
-		} else if (diff > 0 && !(ioflag & IO_DIRECT)) {
-			/* 
-			 * XXX We really should be directly checking whether
-			 * the file was opened with FOPEN_DIRECT_IO, not
-			 * IO_DIRECT.  IO_DIRECT can be set in multiple ways.
-			 */
-			SDT_PROBE2(fuse, , io, trace, 1,
-				"misbehaving filesystem: short writes are only "
-				"allowed with direct_io");
-		}
-		uio->uio_resid += diff;
-		uio->uio_offset -= diff;
+		diff = fwi->size - fwo->size;
+		as_written_offset = uio->uio_offset - diff;
 
-		if (uio->uio_offset > fvdat->filesize &&
+		if (as_written_offset - diff > fvdat->filesize &&
 		    fuse_data_cache_mode != FUSE_CACHE_UC) {
-			fuse_vnode_setsize(vp, cred, uio->uio_offset);
+			fuse_vnode_setsize(vp, cred, as_written_offset);
 			fvdat->flag &= ~FN_SIZECHANGE;
+		}
+
+		if (diff < 0) {
+			printf("WARNING: misbehaving FUSE filesystem "
+				"wrote more data than we provided it\n");
+			err = EINVAL;
+			break;
+		} else if (diff > 0) {
+			/* Short write */
+			if (!direct_io) {
+				printf("WARNING: misbehaving FUSE filesystem: "
+					"short writes are only allowed with "
+					"direct_io\n");
+			}
+			if (ioflag & IO_DIRECT) {
+				/* Return early */
+				uio->uio_resid += diff;
+				uio->uio_offset -= diff;
+				break;
+			} else {
+				/* Resend the unwritten portion of data */
+				fdi.iosize = sizeof(*fwi) + diff;
+				/* Refresh fdi without clearing data buffer */
+				fdisp_refresh_vp(&fdi, FUSE_WRITE, vp,
+					uio->uio_td, cred);
+				fwi = fdi.indata;
+				MPASS2(fwi == fdi.indata, "FUSE dispatcher "
+					"reallocated despite no increase in "
+					"size?");
+				void *src = (char*)fwi_data + fwo->size;
+				memmove(fwi_data, src, diff);
+				fwi->fh = fufh->fh_id;
+				fwi->offset = as_written_offset;
+				fwi->size = diff;
+				goto retry;
+			}
 		}
 	}
 
 	fdisp_destroy(&fdi);
 
 	return (err);
 }
 
 SDT_PROBE_DEFINE6(fuse, , io, write_biobackend_start, "int64_t", "int", "int",
 		"struct uio*", "int", "bool");
 SDT_PROBE_DEFINE2(fuse, , io, write_biobackend_append_race, "long", "int");
 
 static int
 fuse_write_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct buf *bp;
 	daddr_t lbn;
 	int bcount;
 	int n, on, err = 0;
 
 	const int biosize = fuse_iosize(vp);
 
 	KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode"));
 	if (vp->v_type != VREG)
 		return (EIO);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	if (uio->uio_resid == 0)
 		return (0);
 	if (ioflag & IO_APPEND)
 		uio_setoffset(uio, fvdat->filesize);
 
 	/*
          * Find all of this file's B_NEEDCOMMIT buffers.  If our writes
          * would exceed the local maximum per-file write commit size when
          * combined with those, we must decide whether to flush,
          * go synchronous, or return err.  We don't bother checking
          * IO_UNIT -- we just make all writes atomic anyway, as there's
          * no point optimizing for something that really won't ever happen.
          */
 	do {
 		if (fuse_isdeadfs(vp)) {
 			err = ENXIO;
 			break;
 		}
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
 
 again:
 		/*
 	         * Handle direct append and file extension cases, calculate
 	         * unaligned buffer size.
 	         */
 		if (uio->uio_offset == fvdat->filesize && n) {
 			/*
 	                 * Get the buffer (in its pre-append state to maintain
 	                 * B_CACHE if it was previously set).  Resize the
 	                 * nfsnode after we have locked the buffer to prevent
 	                 * readers from reading garbage.
 	                 */
 			bcount = on;
 			SDT_PROBE6(fuse, , io, write_biobackend_start,
 				lbn, on, n, uio, bcount, true);
 			bp = getblk(vp, lbn, bcount, PCATCH, 0, 0);
 
 			if (bp != NULL) {
 				long save;
 
 				err = fuse_vnode_setsize(vp, cred, 
 							 uio->uio_offset + n);
 				if (err) {
 					brelse(bp);
 					break;
 				}
 				save = bp->b_flags & B_CACHE;
 				bcount += n;
 				allocbuf(bp, bcount);
 				bp->b_flags |= save;
 			}
 		} else {
 			/*
 	                 * Obtain the locked cache block first, and then
 	                 * adjust the file's size as appropriate.
 	                 */
 			bcount = on + n;
 			if ((off_t)lbn * biosize + bcount < fvdat->filesize) {
 				if ((off_t)(lbn + 1) * biosize < fvdat->filesize)
 					bcount = biosize;
 				else
 					bcount = fvdat->filesize - 
 					  (off_t)lbn *biosize;
 			}
 			SDT_PROBE6(fuse, , io, write_biobackend_start,
 				lbn, on, n, uio, bcount, false);
 			bp = getblk(vp, lbn, bcount, PCATCH, 0, 0);
 			if (bp && uio->uio_offset + n > fvdat->filesize) {
 				err = fuse_vnode_setsize(vp, cred, 
 							 uio->uio_offset + n);
 				if (err) {
 					brelse(bp);
 					break;
 				}
 			}
 		}
 
 		if (!bp) {
 			err = EINTR;
 			break;
 		}
 		/*
 	         * Issue a READ if B_CACHE is not set.  In special-append
 	         * mode, B_CACHE is based on the buffer prior to the write
 	         * op and is typically set, avoiding the read.  If a read
 	         * is required in special append mode, the server will
 	         * probably send us a short-read since we extended the file
 	         * on our end, resulting in b_resid == 0 and, thusly,
 	         * B_CACHE getting set.
 	         *
 	         * We can also avoid issuing the read if the write covers
 	         * the entire buffer.  We have to make sure the buffer state
 	         * is reasonable in this case since we will not be initiating
 	         * I/O.  See the comments in kern/vfs_bio.c's getblk() for
 	         * more information.
 	         *
 	         * B_CACHE may also be set due to the buffer being cached
 	         * normally.
 	         */
 
 		if (on == 0 && n == bcount) {
 			bp->b_flags |= B_CACHE;
 			bp->b_flags &= ~B_INVAL;
 			bp->b_ioflags &= ~BIO_ERROR;
 		}
 		if ((bp->b_flags & B_CACHE) == 0) {
 			bp->b_iocmd = BIO_READ;
 			vfs_busy_pages(bp, 0);
 			fuse_io_strategy(vp, bp);
 			if ((err = bp->b_error)) {
 				brelse(bp);
 				break;
 			}
 		}
 		if (bp->b_wcred == NOCRED)
 			bp->b_wcred = crhold(cred);
 
 		/*
 	         * If dirtyend exceeds file size, chop it down.  This should
 	         * not normally occur but there is an append race where it
 	         * might occur XXX, so we log it.
 	         *
 	         * If the chopping creates a reverse-indexed or degenerate
 	         * situation with dirtyoff/end, we 0 both of them.
 	         */
 
 		if (bp->b_dirtyend > bcount) {
 			SDT_PROBE2(fuse, , io, write_biobackend_append_race,
 			    (long)bp->b_blkno * biosize,
 			    bp->b_dirtyend - bcount);
 			bp->b_dirtyend = bcount;
 		}
 		if (bp->b_dirtyoff >= bp->b_dirtyend)
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 
 		/*
 	         * If the new write will leave a contiguous dirty
 	         * area, just update the b_dirtyoff and b_dirtyend,
 	         * otherwise force a write rpc of the old dirty area.
 	         *
 	         * While it is possible to merge discontiguous writes due to
 	         * our having a B_CACHE buffer ( and thus valid read data
 	         * for the hole), we don't because it could lead to
 	         * significant cache coherency problems with multiple clients,
 	         * especially if locking is implemented later on.
 	         *
 	         * as an optimization we could theoretically maintain
 	         * a linked list of discontinuous areas, but we would still
 	         * have to commit them separately so there isn't much
 	         * advantage to it except perhaps a bit of asynchronization.
 	         */
 
 		if (bp->b_dirtyend > 0 &&
 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
 			/*
 	                 * Yes, we mean it. Write out everything to "storage"
 	                 * immediately, without hesitation. (Apart from other
 	                 * reasons: the only way to know if a write is valid
 	                 * if its actually written out.)
 	                 */
 			bwrite(bp);
 			if (bp->b_error == EINTR) {
 				err = EINTR;
 				break;
 			}
 			goto again;
 		}
 		err = uiomove((char *)bp->b_data + on, n, uio);
 
 		/*
 	         * Since this block is being modified, it must be written
 	         * again and not just committed.  Since write clustering does
 	         * not work for the stage 1 data write, only the stage 2
 	         * commit rpc, we have to clear B_CLUSTEROK as well.
 	         */
 		bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 
 		if (err) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = err;
 			brelse(bp);
 			break;
 		}
 		/*
 	         * Only update dirtyoff/dirtyend if not a degenerate
 	         * condition.
 	         */
 		if (n) {
 			if (bp->b_dirtyend > 0) {
 				bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
 				bp->b_dirtyend = MAX((on + n), bp->b_dirtyend);
 			} else {
 				bp->b_dirtyoff = on;
 				bp->b_dirtyend = on + n;
 			}
 			vfs_bio_set_valid(bp, on, n);
 		}
 		err = bwrite(bp);
 		if (err)
 			break;
 	} while (uio->uio_resid > 0 && n > 0);
 
 	if (fuse_sync_resize && (fvdat->flag & FN_SIZECHANGE) != 0)
 		fuse_vnode_savesize(vp, cred, pid);
 
 	return (err);
 }
 
 int
 fuse_io_strategy(struct vnode *vp, struct buf *bp)
 {
 	struct fuse_filehandle *fufh;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct ucred *cred;
 	struct uio *uiop;
 	struct uio uio;
 	struct iovec io;
 	int error = 0;
 	int fflag;
 	/* We don't know the true pid when we're dealing with the cache */
 	pid_t pid = 0;
 
 	const int biosize = fuse_iosize(vp);
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 	MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE);
 
 	fflag = bp->b_iocmd == BIO_READ ? FREAD : FWRITE;
 	cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred;
 	error = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
 	if (bp->b_iocmd == BIO_READ && error == EBADF) {
 		/* 
 		 * This may be a read-modify-write operation on a cached file
 		 * opened O_WRONLY.  The FUSE protocol allows this.
 		 *
 		 * TODO: eliminate this hacky check once the FUFH table is gone
 		 */
 		error = fuse_filehandle_get(vp, FWRITE, &fufh, cred, pid);
 	}
 	if (error) {
 		printf("FUSE: strategy: filehandles are closed\n");
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = error;
 		bufdone(bp);
 		return (error);
 	}
 
 	uiop = &uio;
 	uiop->uio_iov = &io;
 	uiop->uio_iovcnt = 1;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = curthread;
 
 	/*
          * clear BIO_ERROR and B_INVAL state prior to initiating the I/O.  We
          * do this here so we do not have to do it in all the code that
          * calls us.
          */
 	bp->b_flags &= ~B_INVAL;
 	bp->b_ioflags &= ~BIO_ERROR;
 
 	KASSERT(!(bp->b_flags & B_DONE),
 	    ("fuse_io_strategy: bp %p already marked done", bp));
 	if (bp->b_iocmd == BIO_READ) {
 		io.iov_len = uiop->uio_resid = bp->b_bcount;
 		io.iov_base = bp->b_data;
 		uiop->uio_rw = UIO_READ;
 
 		uiop->uio_offset = ((off_t)bp->b_blkno) * biosize;
 		error = fuse_read_directbackend(vp, uiop, cred, fufh);
 
 		/* XXXCEM: Potentially invalid access to cached_attrs here */
 		if ((!error && uiop->uio_resid) ||
 		    (fsess_opt_brokenio(vnode_mount(vp)) && error == EIO &&
 		    uiop->uio_offset < fvdat->filesize && fvdat->filesize > 0 &&
 		    uiop->uio_offset >= fvdat->cached_attrs.va_size)) {
 			/*
 	                 * If we had a short read with no error, we must have
 	                 * hit a file hole.  We should zero-fill the remainder.
 	                 * This can also occur if the server hits the file EOF.
 	                 *
 	                 * Holes used to be able to occur due to pending
 	                 * writes, but that is not possible any longer.
 	                 */
 			int nread = bp->b_bcount - uiop->uio_resid;
 			int left = uiop->uio_resid;
 
 			if (error != 0) {
 				printf("FUSE: Fix broken io: offset %ju, "
 				       " resid %zd, file size %ju/%ju\n", 
 				       (uintmax_t)uiop->uio_offset,
 				    uiop->uio_resid, fvdat->filesize,
 				    fvdat->cached_attrs.va_size);
 				error = 0;
 			}
 			if (left > 0)
 				bzero((char *)bp->b_data + nread, left);
 			uiop->uio_resid = 0;
 		}
 		if (error) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = error;
 		}
 	} else {
 		/*
 	         * If we only need to commit, try to commit
 	         */
 		if (bp->b_flags & B_NEEDCOMMIT) {
 			SDT_PROBE2(fuse, , io, trace, 1,
 				"write: B_NEEDCOMMIT flags set");
 		}
 		/*
 	         * Setup for actual write
 	         */
 		if ((off_t)bp->b_blkno * biosize + bp->b_dirtyend > 
 		    fvdat->filesize)
 			bp->b_dirtyend = fvdat->filesize - 
 				(off_t)bp->b_blkno * biosize;
 
 		if (bp->b_dirtyend > bp->b_dirtyoff) {
 			io.iov_len = uiop->uio_resid = bp->b_dirtyend
 			    - bp->b_dirtyoff;
 			uiop->uio_offset = (off_t)bp->b_blkno * biosize
 			    + bp->b_dirtyoff;
 			io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 			uiop->uio_rw = UIO_WRITE;
 
 			error = fuse_write_directbackend(vp, uiop, cred, fufh, 0);
 
 			if (error == EINTR || error == ETIMEDOUT
 			    || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
 
 				bp->b_flags &= ~(B_INVAL | B_NOCACHE);
 				if ((bp->b_flags & B_PAGING) == 0) {
 					bdirty(bp);
 					bp->b_flags &= ~B_DONE;
 				}
 				if ((error == EINTR || error == ETIMEDOUT) &&
 				    (bp->b_flags & B_ASYNC) == 0)
 					bp->b_flags |= B_EINTR;
 			} else {
 				if (error) {
 					bp->b_ioflags |= BIO_ERROR;
 					bp->b_flags |= B_INVAL;
 					bp->b_error = error;
 				}
 				bp->b_dirtyoff = bp->b_dirtyend = 0;
 			}
 		} else {
 			bp->b_resid = 0;
 			bufdone(bp);
 			return (0);
 		}
 	}
 	bp->b_resid = uiop->uio_resid;
 	bufdone(bp);
 	return (error);
 }
 
 int
 fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td)
 {
 	struct vop_fsync_args a = {
 		.a_vp = vp,
 		.a_waitfor = waitfor,
 		.a_td = td,
 	};
 
 	return (vop_stdfsync(&a));
 }
 
 /*
  * Flush and invalidate all dirty buffers. If another process is already
  * doing the flush, just wait for completion.
  */
 int
 fuse_io_invalbuf(struct vnode *vp, struct thread *td)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	int error = 0;
 
 	if (vp->v_iflag & VI_DOOMED)
 		return 0;
 
 	ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf");
 
 	while (fvdat->flag & FN_FLUSHINPROG) {
 		struct proc *p = td->td_proc;
 
 		if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)
 			return EIO;
 		fvdat->flag |= FN_FLUSHWANT;
 		tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz);
 		error = 0;
 		if (p != NULL) {
 			PROC_LOCK(p);
 			if (SIGNOTEMPTY(p->p_siglist) ||
 			    SIGNOTEMPTY(td->td_siglist))
 				error = EINTR;
 			PROC_UNLOCK(p);
 		}
 		if (error == EINTR)
 			return EINTR;
 	}
 	fvdat->flag |= FN_FLUSHINPROG;
 
 	if (vp->v_bufobj.bo_object != NULL) {
 		VM_OBJECT_WLOCK(vp->v_bufobj.bo_object);
 		vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC);
 		VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object);
 	}
 	error = vinvalbuf(vp, V_SAVE, PCATCH, 0);
 	while (error) {
 		if (error == ERESTART || error == EINTR) {
 			fvdat->flag &= ~FN_FLUSHINPROG;
 			if (fvdat->flag & FN_FLUSHWANT) {
 				fvdat->flag &= ~FN_FLUSHWANT;
 				wakeup(&fvdat->flag);
 			}
 			return EINTR;
 		}
 		error = vinvalbuf(vp, V_SAVE, PCATCH, 0);
 	}
 	fvdat->flag &= ~FN_FLUSHINPROG;
 	if (fvdat->flag & FN_FLUSHWANT) {
 		fvdat->flag &= ~FN_FLUSHWANT;
 		wakeup(&fvdat->flag);
 	}
 	return (error);
 }
Index: projects/fuse2/sys/fs/fuse/fuse_ipc.c
===================================================================
--- projects/fuse2/sys/fs/fuse/fuse_ipc.c	(revision 345875)
+++ projects/fuse2/sys/fs/fuse/fuse_ipc.c	(revision 345876)
@@ -1,822 +1,887 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/sdt.h>
 #include <sys/vnode.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <vm/uma.h>
 
 #include "fuse.h"
 #include "fuse_node.h"
 #include "fuse_ipc.h"
 #include "fuse_internal.h"
 
 SDT_PROVIDER_DECLARE(fuse);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(fuse, , ipc, trace, "int", "char*");
 
+static void fiov_clear(struct fuse_iov *fiov);
 static struct fuse_ticket *fticket_alloc(struct fuse_data *data);
 static void fticket_refresh(struct fuse_ticket *ftick);
 static void fticket_destroy(struct fuse_ticket *ftick);
 static int fticket_wait_answer(struct fuse_ticket *ftick);
 static inline int 
 fticket_aw_pull_uio(struct fuse_ticket *ftick,
     struct uio *uio);
 
 static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
 
 static fuse_handler_t fuse_standard_handler;
 
 SYSCTL_NODE(_vfs, OID_AUTO, fusefs, CTLFLAG_RW, 0, "FUSE tunables");
 SYSCTL_STRING(_vfs_fusefs, OID_AUTO, version, CTLFLAG_RD,
     FUSE_FREEBSD_VERSION, 0, "fuse-freebsd version");
 static int fuse_ticket_count = 0;
 
 SYSCTL_INT(_vfs_fusefs, OID_AUTO, ticket_count, CTLFLAG_RW,
     &fuse_ticket_count, 0, "number of allocated tickets");
 static long fuse_iov_permanent_bufsize = 1 << 19;
 
 SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,
     &fuse_iov_permanent_bufsize, 0,
     "limit for permanently stored buffer size for fuse_iovs");
 static int fuse_iov_credit = 16;
 
 SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW,
     &fuse_iov_credit, 0,
     "how many times is an oversized fuse_iov tolerated");
 
 MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer");
 static uma_zone_t ticket_zone;
 
 static void
 fuse_block_sigs(sigset_t *oldset)
 {
 	sigset_t newset;
 
 	SIGFILLSET(newset);
 	SIGDELSET(newset, SIGKILL);
 	if (kern_sigprocmask(curthread, SIG_BLOCK, &newset, oldset, 0))
 		panic("%s: Invalid operation for kern_sigprocmask()",
 		    __func__);
 }
 
 static void
 fuse_restore_sigs(sigset_t *oldset)
 {
 
 	if (kern_sigprocmask(curthread, SIG_SETMASK, oldset, NULL, 0))
 		panic("%s: Invalid operation for kern_sigprocmask()",
 		    __func__);
 }
 
 void
 fiov_init(struct fuse_iov *fiov, size_t size)
 {
 	uint32_t msize = FU_AT_LEAST(size);
 
 	fiov->len = 0;
 
 	fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO);
 
 	fiov->allocated_size = msize;
 	fiov->credit = fuse_iov_credit;
 }
 
 void
 fiov_teardown(struct fuse_iov *fiov)
 {
 	MPASS(fiov->base != NULL);
 	free(fiov->base, M_FUSEMSG);
 }
 
 void
 fiov_adjust(struct fuse_iov *fiov, size_t size)
 {
 	if (fiov->allocated_size < size ||
 	    (fuse_iov_permanent_bufsize >= 0 &&
 	    fiov->allocated_size - size > fuse_iov_permanent_bufsize &&
 	    --fiov->credit < 0)) {
 
 		fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG,
 		    M_WAITOK | M_ZERO);
 		if (!fiov->base) {
 			panic("FUSE: realloc failed");
 		}
 		fiov->allocated_size = FU_AT_LEAST(size);
 		fiov->credit = fuse_iov_credit;
 	}
 	fiov->len = size;
 }
 
+/* Clear the fiov's data buffer */
+static void
+fiov_clear(struct fuse_iov *fiov)
+{
+	bzero(fiov->base, fiov->len);
+}
+
+/* Resize the fiov if needed, and clear it's buffer */
 void
 fiov_refresh(struct fuse_iov *fiov)
 {
-	bzero(fiov->base, fiov->len);
 	fiov_adjust(fiov, 0);
+	bzero(fiov->base, fiov->len);
 }
 
 static int
 fticket_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct fuse_ticket *ftick = mem;
 	struct fuse_data *data = arg;
 
 	FUSE_ASSERT_MS_DONE(ftick);
 	FUSE_ASSERT_AW_DONE(ftick);
 
 	ftick->tk_data = data;
 
 	if (ftick->tk_unique != 0)
 		fticket_refresh(ftick);
 
 	/* May be truncated to 32 bits */
 	ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
 	if (ftick->tk_unique == 0)
 		ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
 
 	refcount_init(&ftick->tk_refcount, 1);
 	atomic_add_acq_int(&fuse_ticket_count, 1);
 
 	return 0;
 }
 
 static void
 fticket_dtor(void *mem, int size, void *arg)
 {
 	struct fuse_ticket *ftick = mem;
 
 	FUSE_ASSERT_MS_DONE(ftick);
 	FUSE_ASSERT_AW_DONE(ftick);
 
 	atomic_subtract_acq_int(&fuse_ticket_count, 1);
 }
 
 static int
 fticket_init(void *mem, int size, int flags)
 {
 	struct fuse_ticket *ftick = mem;
 
 	bzero(ftick, sizeof(struct fuse_ticket));
 
 	fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header));
 	ftick->tk_ms_type = FT_M_FIOV;
 
 	mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF);
 	fiov_init(&ftick->tk_aw_fiov, 0);
 	ftick->tk_aw_type = FT_A_FIOV;
 
 	return 0;
 }
 
 static void
 fticket_fini(void *mem, int size)
 {
 	struct fuse_ticket *ftick = mem;
 
 	fiov_teardown(&ftick->tk_ms_fiov);
 	fiov_teardown(&ftick->tk_aw_fiov);
 	mtx_destroy(&ftick->tk_aw_mtx);
 }
 
 static inline struct fuse_ticket *
 fticket_alloc(struct fuse_data *data)
 {
 	return uma_zalloc_arg(ticket_zone, data, M_WAITOK);
 }
 
 static inline void
 fticket_destroy(struct fuse_ticket *ftick)
 {
 	return uma_zfree(ticket_zone, ftick);
 }
 
-static	inline
+static inline
 void
 fticket_refresh(struct fuse_ticket *ftick)
 {
 	FUSE_ASSERT_MS_DONE(ftick);
 	FUSE_ASSERT_AW_DONE(ftick);
 
 	fiov_refresh(&ftick->tk_ms_fiov);
 	ftick->tk_ms_bufdata = NULL;
 	ftick->tk_ms_bufsize = 0;
 	ftick->tk_ms_type = FT_M_FIOV;
 
 	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
 
 	fiov_refresh(&ftick->tk_aw_fiov);
 	ftick->tk_aw_errno = 0;
 	ftick->tk_aw_bufdata = NULL;
 	ftick->tk_aw_bufsize = 0;
 	ftick->tk_aw_type = FT_A_FIOV;
 
 	ftick->tk_flag = 0;
 }
 
+/* Prepar the ticket to be reused, but don't clear its data buffers */
+static inline void
+fticket_reset(struct fuse_ticket *ftick)
+{
+	FUSE_ASSERT_MS_DONE(ftick);
+	FUSE_ASSERT_AW_DONE(ftick);
+
+	ftick->tk_ms_bufdata = NULL;
+	ftick->tk_ms_bufsize = 0;
+	ftick->tk_ms_type = FT_M_FIOV;
+
+	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
+
+	ftick->tk_aw_errno = 0;
+	ftick->tk_aw_bufdata = NULL;
+	ftick->tk_aw_bufsize = 0;
+	ftick->tk_aw_type = FT_A_FIOV;
+
+	ftick->tk_flag = 0;
+}
+
 static int
 fticket_wait_answer(struct fuse_ticket *ftick)
 {
 	sigset_t tset;
 	int err = 0;
 	struct fuse_data *data;
 
 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
 
 	if (fticket_answered(ftick)) {
 		goto out;
 	}
 	data = ftick->tk_data;
 
 	if (fdata_get_dead(data)) {
 		err = ENOTCONN;
 		fticket_set_answered(ftick);
 		goto out;
 	}
 	fuse_block_sigs(&tset);
 	err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans",
 	    data->daemon_timeout * hz);
 	fuse_restore_sigs(&tset);
 	if (err == EAGAIN) {		/* same as EWOULDBLOCK */
 #ifdef XXXIP				/* die conditionally */
 		if (!fdata_get_dead(data)) {
 			fdata_set_dead(data);
 		}
 #endif
 		err = ETIMEDOUT;
 		fticket_set_answered(ftick);
 	}
 out:
 	if (!(err || fticket_answered(ftick))) {
 		SDT_PROBE2(fuse, , ipc, trace, 1,
 			"FUSE: requester was woken up but still no answer");
 		err = ENXIO;
 	}
 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
 
 	return err;
 }
 
 static	inline
 int
 fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio)
 {
 	int err = 0;
 	size_t len = uio_resid(uio);
 
 	if (len) {
 		switch (ftick->tk_aw_type) {
 		case FT_A_FIOV:
 			fiov_adjust(fticket_resp(ftick), len);
 			err = uiomove(fticket_resp(ftick)->base, len, uio);
 			break;
 
 		case FT_A_BUF:
 			ftick->tk_aw_bufsize = len;
 			err = uiomove(ftick->tk_aw_bufdata, len, uio);
 			break;
 
 		default:
 			panic("FUSE: unknown answer type for ticket %p", ftick);
 		}
 	}
 	return err;
 }
 
 int
 fticket_pull(struct fuse_ticket *ftick, struct uio *uio)
 {
 	int err = 0;
 
 	if (ftick->tk_aw_ohead.error) {
 		return 0;
 	}
 	err = fuse_body_audit(ftick, uio_resid(uio));
 	if (!err) {
 		err = fticket_aw_pull_uio(ftick, uio);
 	}
 	return err;
 }
 
 struct fuse_data *
 fdata_alloc(struct cdev *fdev, struct ucred *cred)
 {
 	struct fuse_data *data;
 
 	data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO);
 
 	data->fdev = fdev;
 	mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF);
 	STAILQ_INIT(&data->ms_head);
 	mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF);
 	TAILQ_INIT(&data->aw_head);
 	data->daemoncred = crhold(cred);
 	data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT;
 	sx_init(&data->rename_lock, "fuse rename lock");
 	data->ref = 1;
 
 	return data;
 }
 
 void
 fdata_trydestroy(struct fuse_data *data)
 {
 	data->ref--;
 	MPASS(data->ref >= 0);
 	if (data->ref != 0)
 		return;
 
 	/* Driving off stage all that stuff thrown at device... */
 	mtx_destroy(&data->ms_mtx);
 	mtx_destroy(&data->aw_mtx);
 	sx_destroy(&data->rename_lock);
 
 	crfree(data->daemoncred);
 
 	free(data, M_FUSEMSG);
 }
 
 void
 fdata_set_dead(struct fuse_data *data)
 {
 	FUSE_LOCK();
 	if (fdata_get_dead(data)) {
 		FUSE_UNLOCK();
 		return;
 	}
 	fuse_lck_mtx_lock(data->ms_mtx);
 	data->dataflags |= FSESS_DEAD;
 	wakeup_one(data);
 	selwakeuppri(&data->ks_rsel, PZERO + 1);
 	wakeup(&data->ticketer);
 	fuse_lck_mtx_unlock(data->ms_mtx);
 	FUSE_UNLOCK();
 }
 
 struct fuse_ticket *
 fuse_ticket_fetch(struct fuse_data *data)
 {
 	int err = 0;
 	struct fuse_ticket *ftick;
 
 	ftick = fticket_alloc(data);
 
 	if (!(data->dataflags & FSESS_INITED)) {
 		/* Sleep until get answer for INIT messsage */
 		FUSE_LOCK();
 		if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) {
 			err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP,
 			    "fu_ini", 0);
 			if (err)
 				fdata_set_dead(data);
 		} else
 			FUSE_UNLOCK();
 	}
 	return ftick;
 }
 
 int
 fuse_ticket_drop(struct fuse_ticket *ftick)
 {
 	int die;
 
 	die = refcount_release(&ftick->tk_refcount);
 	if (die)
 		fticket_destroy(ftick);
 
 	return die;
 }
 
 void
 fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler)
 {
 	if (fdata_get_dead(ftick->tk_data)) {
 		return;
 	}
 	ftick->tk_aw_handler = handler;
 
 	fuse_lck_mtx_lock(ftick->tk_data->aw_mtx);
 	fuse_aw_push(ftick);
 	fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx);
 }
 
 void
 fuse_insert_message(struct fuse_ticket *ftick)
 {
 	if (ftick->tk_flag & FT_DIRTY) {
 		panic("FUSE: ticket reused without being refreshed");
 	}
 	ftick->tk_flag |= FT_DIRTY;
 
 	if (fdata_get_dead(ftick->tk_data)) {
 		return;
 	}
 	fuse_lck_mtx_lock(ftick->tk_data->ms_mtx);
 	fuse_ms_push(ftick);
 	wakeup_one(ftick->tk_data);
 	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
 	fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
 }
 
 static int
 fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
 {
 	int err = 0;
 	enum fuse_opcode opcode;
 
 	opcode = fticket_opcode(ftick);
 
 	switch (opcode) {
 	case FUSE_LOOKUP:
 		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_FORGET:
 		panic("FUSE: a handler has been intalled for FUSE_FORGET");
 		break;
 
 	case FUSE_GETATTR:
 		err = (blen == sizeof(struct fuse_attr_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_SETATTR:
 		err = (blen == sizeof(struct fuse_attr_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_READLINK:
 		err = (PAGE_SIZE >= blen) ? 0 : EINVAL;
 		break;
 
 	case FUSE_SYMLINK:
 		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_MKNOD:
 		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_MKDIR:
 		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_UNLINK:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_RMDIR:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_RENAME:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_LINK:
 		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_OPEN:
 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_READ:
 		err = (((struct fuse_read_in *)(
 		    (char *)ftick->tk_ms_fiov.base +
 		    sizeof(struct fuse_in_header)
 		    ))->size >= blen) ? 0 : EINVAL;
 		break;
 
 	case FUSE_WRITE:
 		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_STATFS:
 		if (fuse_libabi_geq(ftick->tk_data, 7, 4)) {
 			err = (blen == sizeof(struct fuse_statfs_out)) ? 
 			  0 : EINVAL;
 		} else {
 			err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL;
 		}
 		break;
 
 	case FUSE_RELEASE:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_FSYNC:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_SETXATTR:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_GETXATTR:
 	case FUSE_LISTXATTR:
 		/*
 		 * These can have varying response lengths, and 0 length
 		 * isn't necessarily invalid.
 		 */
 		err = 0;
 		break;
 
 	case FUSE_REMOVEXATTR:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_FLUSH:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_INIT:
 		if (blen == sizeof(struct fuse_init_out) || blen == 8) {
 			err = 0;
 		} else {
 			err = EINVAL;
 		}
 		break;
 
 	case FUSE_OPENDIR:
 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_READDIR:
 		err = (((struct fuse_read_in *)(
 		    (char *)ftick->tk_ms_fiov.base +
 		    sizeof(struct fuse_in_header)
 		    ))->size >= blen) ? 0 : EINVAL;
 		break;
 
 	case FUSE_RELEASEDIR:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_FSYNCDIR:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_GETLK:
 		panic("FUSE: no response body format check for FUSE_GETLK");
 		break;
 
 	case FUSE_SETLK:
 		panic("FUSE: no response body format check for FUSE_SETLK");
 		break;
 
 	case FUSE_SETLKW:
 		panic("FUSE: no response body format check for FUSE_SETLKW");
 		break;
 
 	case FUSE_ACCESS:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_CREATE:
 		err = (blen == sizeof(struct fuse_entry_out) +
 		    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_DESTROY:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	default:
 		panic("FUSE: opcodes out of sync (%d)\n", opcode);
 	}
 
 	return err;
 }
 
 static inline void
 fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick,
     uint64_t nid, enum fuse_opcode op, size_t blen, pid_t pid,
     struct ucred *cred)
 {
 	ihead->len = sizeof(*ihead) + blen;
 	ihead->unique = ftick->tk_unique;
 	ihead->nodeid = nid;
 	ihead->opcode = op;
 
 	ihead->pid = pid;
 	ihead->uid = cred->cr_uid;
 	ihead->gid = cred->cr_rgid;
 }
 
 /*
  * fuse_standard_handler just pulls indata and wakes up pretender.
  * Doesn't try to interpret data, that's left for the pretender.
  * Though might do a basic size verification before the pull-in takes place
  */
 
 static int
 fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio)
 {
 	int err = 0;
 
 	err = fticket_pull(ftick, uio);
 
 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
 
 	if (!fticket_answered(ftick)) {
 		fticket_set_answered(ftick);
 		ftick->tk_aw_errno = err;
 		wakeup(ftick);
 	}
 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
 
 	return err;
 }
 
-void
+/*
+ * Reinitialize a dispatcher from a pid and node id, without resizing or
+ * clearing its data buffers
+ */
+static void
+fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
+    struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
+{
+	MPASS(fdip->tick);
+	fticket_reset(fdip->tick);
+
+	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
+	    fdip->indata, fdip->iosize);
+
+	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid,
+		cred);
+}
+
+/* Initialize a dispatcher from a pid and node id */
+static void
 fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	if (fdip->tick) {
 		fticket_refresh(fdip->tick);
 	} else {
 		fdip->tick = fuse_ticket_fetch(data);
 	}
 
 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
 	    fdip->indata, fdip->iosize);
 
 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred);
 }
 
 void
 fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp,
     uint64_t nid, struct thread *td, struct ucred *cred)
 {
 	RECTIFY_TDCR(td, cred);
 
 	return fdisp_make_pid(fdip, op, mp, nid, td->td_proc->p_pid, cred);
 }
 
 void
 fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct vnode *vp, struct thread *td, struct ucred *cred)
 {
 	RECTIFY_TDCR(td, cred);
 	return fdisp_make_pid(fdip, op, vnode_mount(vp), VTOI(vp),
 	    td->td_proc->p_pid, cred);
+}
+
+/* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */
+void
+fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
+    struct vnode *vp, struct thread *td, struct ucred *cred)
+{
+	RECTIFY_TDCR(td, cred);
+	return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp),
+	    td->td_proc->p_pid, cred);
+}
+
+void
+fdisp_refresh(struct fuse_dispatcher *fdip)
+{
+	fticket_refresh(fdip->tick);
 }
 
 SDT_PROBE_DEFINE2(fuse, , ipc, fdisp_wait_answ_error, "char*", "int");
 
 int
 fdisp_wait_answ(struct fuse_dispatcher *fdip)
 {
 	int err = 0;
 
 	fdip->answ_stat = 0;
 	fuse_insert_callback(fdip->tick, fuse_standard_handler);
 	fuse_insert_message(fdip->tick);
 
 	if ((err = fticket_wait_answer(fdip->tick))) {
 		fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx);
 
 		if (fticket_answered(fdip->tick)) {
 			/*
 	                 * Just between noticing the interrupt and getting here,
 	                 * the standard handler has completed his job.
 	                 * So we drop the ticket and exit as usual.
 	                 */
 			SDT_PROBE2(fuse, , ipc, fdisp_wait_answ_error,
 				"IPC: interrupted, already answered", err);
 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
 			goto out;
 		} else {
 			/*
 	                 * So we were faster than the standard handler.
 	                 * Then by setting the answered flag we get *him*
 	                 * to drop the ticket.
 	                 */
 			SDT_PROBE2(fuse, , ipc, fdisp_wait_answ_error,
 				"IPC: interrupted, setting to answered", err);
 			fticket_set_answered(fdip->tick);
 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
 			return err;
 		}
 	}
 
 	if (fdip->tick->tk_aw_errno) {
 		SDT_PROBE2(fuse, , ipc, fdisp_wait_answ_error,
 			"IPC: explicit EIO-ing", fdip->tick->tk_aw_errno);
 		err = EIO;
 		goto out;
 	}
 	if ((err = fdip->tick->tk_aw_ohead.error)) {
 		SDT_PROBE2(fuse, , ipc, fdisp_wait_answ_error,
 			"IPC: setting status", fdip->tick->tk_aw_ohead.error);
 		/*
 	         * This means a "proper" fuse syscall error.
 	         * We record this value so the caller will
 	         * be able to know it's not a boring messaging
 	         * failure, if she wishes so (and if not, she can
 	         * just simply propagate the return value of this routine).
 	         * [XXX Maybe a bitflag would do the job too,
 	         * if other flags needed, this will be converted thusly.]
 	         */
 		fdip->answ_stat = err;
 		goto out;
 	}
 	fdip->answ = fticket_resp(fdip->tick)->base;
 	fdip->iosize = fticket_resp(fdip->tick)->len;
 
 	return 0;
 
 out:
 	return err;
 }
 
 void
 fuse_ipc_init(void)
 {
 	ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket),
 	    fticket_ctor, fticket_dtor, fticket_init, fticket_fini,
 	    UMA_ALIGN_PTR, 0);
 }
 
 void
 fuse_ipc_destroy(void)
 {
 	uma_zdestroy(ticket_zone);
 }
Index: projects/fuse2/sys/fs/fuse/fuse_ipc.h
===================================================================
--- projects/fuse2/sys/fs/fuse/fuse_ipc.h	(revision 345875)
+++ projects/fuse2/sys/fs/fuse/fuse_ipc.h	(revision 345876)
@@ -1,396 +1,398 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
  * All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  * 
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * 
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _FUSE_IPC_H_
 #define _FUSE_IPC_H_
 
 #include <sys/param.h>
 #include <sys/refcount.h>
 
 struct fuse_iov {
 	void   *base;
 	size_t  len;
 	size_t  allocated_size;
 	int     credit;
 };
 
 void fiov_init(struct fuse_iov *fiov, size_t size);
 void fiov_teardown(struct fuse_iov *fiov);
 void fiov_refresh(struct fuse_iov *fiov);
 void fiov_adjust(struct fuse_iov *fiov, size_t size);
 
 #define FUSE_DIMALLOC(fiov, spc1, spc2, amnt) do {		\
 	fiov_adjust(fiov, (sizeof(*(spc1)) + (amnt)));		\
 	(spc1) = (fiov)->base;					\
 	(spc2) = (char *)(fiov)->base + (sizeof(*(spc1)));	\
 } while (0)
 
 #define FU_AT_LEAST(siz) max((siz), 160)
 
 #define FUSE_ASSERT_AW_DONE(ftick)					\
 	KASSERT((ftick)->tk_aw_link.tqe_next == NULL &&			\
 	    (ftick)->tk_aw_link.tqe_prev == NULL,			\
 	    ("FUSE: ticket still on answer delivery list %p", (ftick)))
 
 #define FUSE_ASSERT_MS_DONE(ftick)				\
 	KASSERT((ftick)->tk_ms_link.stqe_next == NULL,		\
 	    ("FUSE: ticket still on message list %p", (ftick)))
 
 struct fuse_ticket;
 struct fuse_data;
 
 typedef int fuse_handler_t(struct fuse_ticket *ftick, struct uio *uio);
 
 struct fuse_ticket {
 	/* fields giving the identity of the ticket */
 	uint64_t			tk_unique;
 	struct fuse_data		*tk_data;
 	int				tk_flag;
 	u_int				tk_refcount;
 
 	/* fields for initiating an upgoing message */
 	struct fuse_iov			tk_ms_fiov;
 	void				*tk_ms_bufdata;
 	size_t				tk_ms_bufsize;
 	enum { FT_M_FIOV, FT_M_BUF }	tk_ms_type;
 	STAILQ_ENTRY(fuse_ticket)	tk_ms_link;
 
 	/* fields for handling answers coming from userspace */
 	struct fuse_iov			tk_aw_fiov;
 	void				*tk_aw_bufdata;
 	size_t				tk_aw_bufsize;
 	enum { FT_A_FIOV, FT_A_BUF }	tk_aw_type;
 
 	struct fuse_out_header		tk_aw_ohead;
 	int				tk_aw_errno;
 	struct mtx			tk_aw_mtx;
 	fuse_handler_t			*tk_aw_handler;
 	TAILQ_ENTRY(fuse_ticket)	tk_aw_link;
 };
 
 #define FT_ANSW  0x01  /* request of ticket has already been answered */
 #define FT_DIRTY 0x04  /* ticket has been used */
 
 static inline struct fuse_iov *
 fticket_resp(struct fuse_ticket *ftick)
 {
 	return (&ftick->tk_aw_fiov);
 }
 
 static inline bool
 fticket_answered(struct fuse_ticket *ftick)
 {
 	mtx_assert(&ftick->tk_aw_mtx, MA_OWNED);
 	return (ftick->tk_flag & FT_ANSW);
 }
 
 static inline void
 fticket_set_answered(struct fuse_ticket *ftick)
 {
 	mtx_assert(&ftick->tk_aw_mtx, MA_OWNED);
 	ftick->tk_flag |= FT_ANSW;
 }
 
 static inline enum fuse_opcode
 fticket_opcode(struct fuse_ticket *ftick)
 {
 	return (((struct fuse_in_header *)(ftick->tk_ms_fiov.base))->opcode);
 }
 
 int fticket_pull(struct fuse_ticket *ftick, struct uio *uio);
 
 enum mountpri { FM_NOMOUNTED, FM_PRIMARY, FM_SECONDARY };
 
 /*
  * The data representing a FUSE session.
  */
 struct fuse_data {
 	struct cdev			*fdev;
 	struct mount			*mp;
 	struct vnode			*vroot;
 	struct ucred			*daemoncred;
 	int				dataflags;
 	int				ref;
 
 	struct mtx			ms_mtx;
 	STAILQ_HEAD(, fuse_ticket)	ms_head;
 
 	struct mtx			aw_mtx;
 	TAILQ_HEAD(, fuse_ticket)	aw_head;
 
 	u_long				ticketer;
 
 	struct sx			rename_lock;
 
 	uint32_t			fuse_libabi_major;
 	uint32_t			fuse_libabi_minor;
 
 	uint32_t			max_write;
 	uint32_t			max_read;
 	uint32_t			subtype;
 	char				volname[MAXPATHLEN];
 
 	struct selinfo			ks_rsel;
 
 	int				daemon_timeout;
 	uint64_t			notimpl;
 };
 
 #define FSESS_DEAD                0x0001 /* session is to be closed */
 #define FSESS_UNUSED0             0x0002 /* unused */
 #define FSESS_INITED              0x0004 /* session has been inited */
 #define FSESS_DAEMON_CAN_SPY      0x0010 /* let non-owners access this fs */
                                          /* (and being observed by the daemon) */
 #define FSESS_PUSH_SYMLINKS_IN    0x0020 /* prefix absolute symlinks with mp */
 #define FSESS_DEFAULT_PERMISSIONS 0x0040 /* kernel does permission checking */
 #define FSESS_NO_ATTRCACHE        0x0080 /* no attribute caching */
 #define FSESS_NO_READAHEAD        0x0100 /* no readaheads */
 #define FSESS_NO_DATACACHE        0x0200 /* disable buffer cache */
 #define FSESS_NO_NAMECACHE        0x0400 /* disable name cache */
 #define FSESS_NO_MMAP             0x0800 /* disable mmap */
 #define FSESS_BROKENIO            0x1000 /* fix broken io */
 
 enum fuse_data_cache_mode {
 	FUSE_CACHE_UC,
 	FUSE_CACHE_WT,
 	FUSE_CACHE_WB,
 };
 
 extern int fuse_data_cache_mode;
 extern int fuse_data_cache_invalidate;
 extern int fuse_mmap_enable;
 extern int fuse_sync_resize;
 extern int fuse_fix_broken_io;
 
 static inline struct fuse_data *
 fuse_get_mpdata(struct mount *mp)
 {
 	return mp->mnt_data;
 }
 
 static inline bool
 fsess_isimpl(struct mount *mp, int opcode)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	return ((data->notimpl & (1ULL << opcode)) == 0);
 
 }
 static inline void
 fsess_set_notimpl(struct mount *mp, int opcode)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	data->notimpl |= (1ULL << opcode);
 }
 
 static inline bool
 fsess_opt_datacache(struct mount *mp)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	return (fuse_data_cache_mode != FUSE_CACHE_UC &&
 	    (data->dataflags & FSESS_NO_DATACACHE) == 0);
 }
 
 static inline bool
 fsess_opt_mmap(struct mount *mp)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	if (!fuse_mmap_enable || fuse_data_cache_mode == FUSE_CACHE_UC)
 		return (false);
 	return ((data->dataflags & (FSESS_NO_DATACACHE | FSESS_NO_MMAP)) == 0);
 }
 
 static inline bool
 fsess_opt_brokenio(struct mount *mp)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	return (fuse_fix_broken_io || (data->dataflags & FSESS_BROKENIO));
 }
 
 static inline void
 fuse_ms_push(struct fuse_ticket *ftick)
 {
 	mtx_assert(&ftick->tk_data->ms_mtx, MA_OWNED);
 	refcount_acquire(&ftick->tk_refcount);
 	STAILQ_INSERT_TAIL(&ftick->tk_data->ms_head, ftick, tk_ms_link);
 }
 
 static inline struct fuse_ticket *
 fuse_ms_pop(struct fuse_data *data)
 {
 	struct fuse_ticket *ftick = NULL;
 
 	mtx_assert(&data->ms_mtx, MA_OWNED);
 
 	if ((ftick = STAILQ_FIRST(&data->ms_head))) {
 		STAILQ_REMOVE_HEAD(&data->ms_head, tk_ms_link);
 #ifdef INVARIANTS
 		ftick->tk_ms_link.stqe_next = NULL;
 #endif
 	}
 
 	return (ftick);
 }
 
 static inline void
 fuse_aw_push(struct fuse_ticket *ftick)
 {
 	mtx_assert(&ftick->tk_data->aw_mtx, MA_OWNED);
 	refcount_acquire(&ftick->tk_refcount);
 	TAILQ_INSERT_TAIL(&ftick->tk_data->aw_head, ftick, tk_aw_link);
 }
 
 static inline void
 fuse_aw_remove(struct fuse_ticket *ftick)
 {
 	mtx_assert(&ftick->tk_data->aw_mtx, MA_OWNED);
 	TAILQ_REMOVE(&ftick->tk_data->aw_head, ftick, tk_aw_link);
 #ifdef INVARIANTS
 	ftick->tk_aw_link.tqe_next = NULL;
 	ftick->tk_aw_link.tqe_prev = NULL;
 #endif
 }
 
 static inline struct fuse_ticket *
 fuse_aw_pop(struct fuse_data *data)
 {
 	struct fuse_ticket *ftick;
 
 	mtx_assert(&data->aw_mtx, MA_OWNED);
 
 	if ((ftick = TAILQ_FIRST(&data->aw_head)) != NULL)
 		fuse_aw_remove(ftick);
 
 	return (ftick);
 }
 
 struct fuse_ticket *fuse_ticket_fetch(struct fuse_data *data);
 int fuse_ticket_drop(struct fuse_ticket *ftick);
 void fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t *handler);
 void fuse_insert_message(struct fuse_ticket *ftick);
 
 static inline bool
 fuse_libabi_geq(struct fuse_data *data, uint32_t abi_maj, uint32_t abi_min)
 {
 	return (data->fuse_libabi_major > abi_maj ||
 	    (data->fuse_libabi_major == abi_maj &&
 	     data->fuse_libabi_minor >= abi_min));
 }
 
 struct fuse_data *fdata_alloc(struct cdev *dev, struct ucred *cred);
 void fdata_trydestroy(struct fuse_data *data);
 void fdata_set_dead(struct fuse_data *data);
 
 static inline bool
 fdata_get_dead(struct fuse_data *data)
 {
 	return (data->dataflags & FSESS_DEAD);
 }
 
 struct fuse_dispatcher {
 	struct fuse_ticket    *tick;
 	struct fuse_in_header *finh;
 
 	void    *indata;
 	size_t   iosize;
 	uint64_t nodeid;
 	int      answ_stat;
 	void    *answ;
 };
 
 static inline void
 fdisp_init(struct fuse_dispatcher *fdisp, size_t iosize)
 {
 	fdisp->iosize = iosize;
 	fdisp->tick = NULL;
 }
 
 static inline void
 fdisp_destroy(struct fuse_dispatcher *fdisp)
 {
 	fuse_ticket_drop(fdisp->tick);
 #ifdef INVARIANTS
 	fdisp->tick = NULL;
 #endif
 }
 
+void fdisp_refresh(struct fuse_dispatcher *fdip);
+
 void fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct mount *mp, uint64_t nid, struct thread *td, struct ucred *cred);
 
-void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
-    struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred);
-
 void fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
+    struct vnode *vp, struct thread *td, struct ucred *cred);
+
+void fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct vnode *vp, struct thread *td, struct ucred *cred);
 
 int fdisp_wait_answ(struct fuse_dispatcher *fdip);
 
 static inline int
 fdisp_simple_putget_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct vnode *vp, struct thread *td, struct ucred *cred)
 {
 	fdisp_make_vp(fdip, op, vp, td, cred);
 	return (fdisp_wait_answ(fdip));
 }
 
 #endif /* _FUSE_IPC_H_ */
Index: projects/fuse2/sys/fs/fuse/fuse_vnops.c
===================================================================
--- projects/fuse2/sys/fs/fuse/fuse_vnops.c	(revision 345875)
+++ projects/fuse2/sys/fs/fuse/fuse_vnops.c	(revision 345876)
@@ -1,2421 +1,2422 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/extattr.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/filedesc.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/dirent.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_object.h>
 
 #include "fuse.h"
 #include "fuse_file.h"
 #include "fuse_internal.h"
 #include "fuse_ipc.h"
 #include "fuse_node.h"
 #include "fuse_io.h"
 
 #include <sys/priv.h>
 
 /* Maximum number of hardlinks to a single FUSE file */
 #define FUSE_LINK_MAX                      UINT32_MAX
 
 SDT_PROVIDER_DECLARE(fuse);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(fuse, , vnops, trace, "int", "char*");
 
 /* vnode ops */
 static vop_access_t fuse_vnop_access;
 static vop_close_t fuse_vnop_close;
 static vop_create_t fuse_vnop_create;
 static vop_deleteextattr_t fuse_vnop_deleteextattr;
 static vop_fdatasync_t fuse_vnop_fdatasync;
 static vop_fsync_t fuse_vnop_fsync;
 static vop_getattr_t fuse_vnop_getattr;
 static vop_getextattr_t fuse_vnop_getextattr;
 static vop_inactive_t fuse_vnop_inactive;
 static vop_link_t fuse_vnop_link;
 static vop_listextattr_t fuse_vnop_listextattr;
 static vop_lookup_t fuse_vnop_lookup;
 static vop_mkdir_t fuse_vnop_mkdir;
 static vop_mknod_t fuse_vnop_mknod;
 static vop_open_t fuse_vnop_open;
 static vop_pathconf_t fuse_vnop_pathconf;
 static vop_read_t fuse_vnop_read;
 static vop_readdir_t fuse_vnop_readdir;
 static vop_readlink_t fuse_vnop_readlink;
 static vop_reclaim_t fuse_vnop_reclaim;
 static vop_remove_t fuse_vnop_remove;
 static vop_rename_t fuse_vnop_rename;
 static vop_rmdir_t fuse_vnop_rmdir;
 static vop_setattr_t fuse_vnop_setattr;
 static vop_setextattr_t fuse_vnop_setextattr;
 static vop_strategy_t fuse_vnop_strategy;
 static vop_symlink_t fuse_vnop_symlink;
 static vop_write_t fuse_vnop_write;
 static vop_getpages_t fuse_vnop_getpages;
 static vop_putpages_t fuse_vnop_putpages;
 static vop_print_t fuse_vnop_print;
 
 struct vop_vector fuse_vnops = {
 	.vop_default = &default_vnodeops,
 	.vop_access = fuse_vnop_access,
 	.vop_close = fuse_vnop_close,
 	.vop_create = fuse_vnop_create,
 	.vop_deleteextattr = fuse_vnop_deleteextattr,
 	.vop_fsync = fuse_vnop_fsync,
 	.vop_fdatasync = fuse_vnop_fdatasync,
 	.vop_getattr = fuse_vnop_getattr,
 	.vop_getextattr = fuse_vnop_getextattr,
 	.vop_inactive = fuse_vnop_inactive,
 	.vop_link = fuse_vnop_link,
 	.vop_listextattr = fuse_vnop_listextattr,
 	.vop_lookup = fuse_vnop_lookup,
 	.vop_mkdir = fuse_vnop_mkdir,
 	.vop_mknod = fuse_vnop_mknod,
 	.vop_open = fuse_vnop_open,
 	.vop_pathconf = fuse_vnop_pathconf,
 	.vop_read = fuse_vnop_read,
 	.vop_readdir = fuse_vnop_readdir,
 	.vop_readlink = fuse_vnop_readlink,
 	.vop_reclaim = fuse_vnop_reclaim,
 	.vop_remove = fuse_vnop_remove,
 	.vop_rename = fuse_vnop_rename,
 	.vop_rmdir = fuse_vnop_rmdir,
 	.vop_setattr = fuse_vnop_setattr,
 	.vop_setextattr = fuse_vnop_setextattr,
 	.vop_strategy = fuse_vnop_strategy,
 	.vop_symlink = fuse_vnop_symlink,
 	.vop_write = fuse_vnop_write,
 	.vop_getpages = fuse_vnop_getpages,
 	.vop_putpages = fuse_vnop_putpages,
 	.vop_print = fuse_vnop_print,
 };
 
 static u_long fuse_lookup_cache_hits = 0;
 
 SYSCTL_ULONG(_vfs_fusefs, OID_AUTO, lookup_cache_hits, CTLFLAG_RD,
     &fuse_lookup_cache_hits, 0, "number of positive cache hits in lookup");
 
 static u_long fuse_lookup_cache_misses = 0;
 
 SYSCTL_ULONG(_vfs_fusefs, OID_AUTO, lookup_cache_misses, CTLFLAG_RD,
     &fuse_lookup_cache_misses, 0, "number of cache misses in lookup");
 
 int	fuse_lookup_cache_enable = 1;
 
 SYSCTL_INT(_vfs_fusefs, OID_AUTO, lookup_cache_enable, CTLFLAG_RW,
     &fuse_lookup_cache_enable, 0, "if non-zero, enable lookup cache");
 
 /*
  * XXX: This feature is highly experimental and can bring to instabilities,
  * needs revisiting before to be enabled by default.
  */
 static int fuse_reclaim_revoked = 0;
 
 SYSCTL_INT(_vfs_fusefs, OID_AUTO, reclaim_revoked, CTLFLAG_RW,
     &fuse_reclaim_revoked, 0, "");
 
 uma_zone_t fuse_pbuf_zone;
 
 #define fuse_vm_page_lock(m)		vm_page_lock((m));
 #define fuse_vm_page_unlock(m)		vm_page_unlock((m));
 #define fuse_vm_page_lock_queues()	((void)0)
 #define fuse_vm_page_unlock_queues()	((void)0)
 
 /* Get a filehandle for a directory */
 static int
 fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp,
 	struct ucred *cred, pid_t pid)
 {
 	if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0)
 		return 0;
 	return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid);
 }
 
 /* Send FUSE_FLUSH for this vnode */
 static int
 fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
 {
 	struct fuse_flush_in *ffi;
 	struct fuse_filehandle *fufh;
 	struct fuse_dispatcher fdi;
 	struct thread *td = curthread;
 	struct mount *mp = vnode_mount(vp);
 	int err;
 
 	if (!fsess_isimpl(vnode_mount(vp), FUSE_FLUSH))
 		return 0;
 
 	err = fuse_filehandle_get(vp, fflag, &fufh, cred, pid);
 	if (err)
 		return err;
 
 	fdisp_init(&fdi, sizeof(*ffi));
 	fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred);
 	ffi = fdi.indata;
 	ffi->fh = fufh->fh_id;
 
 	err = fdisp_wait_answ(&fdi);
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_FLUSH);
 		err = 0;
 	}
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /*
     struct vnop_access_args {
 	struct vnode *a_vp;
 #if VOP_ACCESS_TAKES_ACCMODE_T
 	accmode_t a_accmode;
 #else
 	int a_mode;
 #endif
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	int accmode = ap->a_accmode;
 	struct ucred *cred = ap->a_cred;
 
 	struct fuse_access_param facp;
 	struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
 
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		if (vnode_isvroot(vp)) {
 			return 0;
 		}
 		return ENXIO;
 	}
 	if (!(data->dataflags & FSESS_INITED)) {
 		if (vnode_isvroot(vp)) {
 			if (priv_check_cred(cred, PRIV_VFS_ADMIN) ||
 			    (fuse_match_cred(data->daemoncred, cred) == 0)) {
 				return 0;
 			}
 		}
 		return EBADF;
 	}
 	if (vnode_islnk(vp)) {
 		return 0;
 	}
 	bzero(&facp, sizeof(facp));
 
 	err = fuse_internal_access(vp, accmode, &facp, ap->a_td, ap->a_cred);
 	return err;
 }
 
 /*
     struct vop_close_args {
 	struct vnode *a_vp;
 	int  a_fflag;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct ucred *cred = ap->a_cred;
 	int fflag = ap->a_fflag;
 	struct thread *td = ap->a_td;
 	pid_t pid = td->td_proc->p_pid;
 	int err = 0;
 
 	if (fuse_isdeadfs(vp))
 		return 0;
 	if (vnode_isdir(vp))
 		return 0;
 	if (fflag & IO_NDELAY)
 		return 0;
 
 	err = fuse_flush(vp, cred, pid, fflag);
 	/* TODO: close the file handle, if we're sure it's no longer used */
 	if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
 		fuse_vnode_savesize(vp, cred, td->td_proc->p_pid);
 	}
 	return err;
 }
 
 static void
 fdisp_make_mknod_for_fallback(
 	struct fuse_dispatcher *fdip,
 	struct componentname *cnp,
 	struct vnode *dvp,
 	uint64_t parentnid,
 	struct thread *td,
 	struct ucred *cred,
 	mode_t mode,
 	enum fuse_opcode *op)
 {
 	struct fuse_mknod_in *fmni;
 
 	fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1);
 	*op = FUSE_MKNOD;
 	fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred);
 	fmni = fdip->indata;
 	fmni->mode = mode;
 	fmni->rdev = 0;
 	memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr,
 	    cnp->cn_namelen);
 	((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0';
 }
 /*
     struct vnop_create_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
     };
 */
 static int
 fuse_vnop_create(struct vop_create_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct vattr *vap = ap->a_vap;
 	struct thread *td = cnp->cn_thread;
 	struct ucred *cred = cnp->cn_cred;
 
 	struct fuse_open_in *foi;
 	struct fuse_entry_out *feo;
 	struct fuse_open_out *foo;
 	struct fuse_dispatcher fdi, fdi2;
 	struct fuse_dispatcher *fdip = &fdi;
 	struct fuse_dispatcher *fdip2 = NULL;
 
 	int err;
 
 	struct mount *mp = vnode_mount(dvp);
 	uint64_t parentnid = VTOFUD(dvp)->nid;
 	mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
 	enum fuse_opcode op;
 	int flags;
 
 	/* 
 	 * VOP_CREATE doesn't tell us the open(2) flags, so we guess.  Only a
 	 * writable mode makes sense, and we might as well include readability
 	 * too.
 	 */
 	flags = O_RDWR;
 
 	if (fuse_isdeadfs(dvp)) {
 		return ENXIO;
 	}
 	bzero(&fdi, sizeof(fdi));
 
 	if ((vap->va_type != VREG))
 		return (EINVAL);
 
 	if (!fsess_isimpl(mp, FUSE_CREATE)) {
 		/* Fallback to FUSE_MKNOD/FUSE_OPEN */
 		fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td,
 			cred, mode, &op);
 	} else {
 		/* Use FUSE_CREATE */
 		op = FUSE_CREATE;
 		fdisp_init(fdip, sizeof(*foi) + cnp->cn_namelen + 1);
 		fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred);
 		foi = fdip->indata;
 		foi->mode = mode;
 		foi->flags = O_CREAT | flags;
 		memcpy((char *)fdip->indata + sizeof(*foi), cnp->cn_nameptr,
 		    cnp->cn_namelen);
 		((char *)fdip->indata)[sizeof(*foi) + cnp->cn_namelen] = '\0';
 	}
 
 	err = fdisp_wait_answ(fdip);
 
 	if (err) {
 		if (err == ENOSYS && op == FUSE_CREATE) {
 			fsess_set_notimpl(mp, FUSE_CREATE);
 			fdisp_make_mknod_for_fallback(fdip, cnp, dvp,
 				parentnid, td, cred, mode, &op);
 			err = fdisp_wait_answ(fdip);
 		}
 		if (err)
 			goto out;
 	}
 
 	feo = fdip->answ;
 
 	if ((err = fuse_internal_checkentry(feo, VREG))) {
 		goto out;
 	}
 
 	if (op == FUSE_CREATE) {
 		foo = (struct fuse_open_out*)(feo + 1);
 	} else {
 		/* Issue a separate FUSE_OPEN */
 		fdip2 = &fdi2;
 		fdisp_init(fdip2, sizeof(*foi));
 		fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td,
 			cred);
 		foi = fdip2->indata;
 		foi->mode = mode;
 		foi->flags = flags;
 		err = fdisp_wait_answ(fdip2);
 		if (err)
 			goto out;
 		foo = fdip2->answ;
 	}
 	err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, VREG);
 	if (err) {
 		struct fuse_release_in *fri;
 		uint64_t nodeid = feo->nodeid;
 		uint64_t fh_id = foo->fh;
 
 		fdisp_init(fdip, sizeof(*fri));
 		fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred);
 		fri = fdip->indata;
 		fri->fh = fh_id;
 		fri->flags = flags;
 		fuse_insert_callback(fdip->tick, fuse_internal_forget_callback);
 		fuse_insert_message(fdip->tick);
 		goto out;
 	}
 	ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create");
 
 	fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td->td_proc->p_pid, cred,
 		foo);
 	fuse_vnode_open(*vpp, foo->open_flags, td);
 	cache_purge_negative(dvp);
 
 out:
 	if (fdip2)
 		fdisp_destroy(fdip2);
 	fdisp_destroy(fdip);
 	return err;
 }
 
 /*
     struct vnop_fdatasync_args {
 	struct vop_generic_args a_gen;
 	struct vnode * a_vp;
 	struct thread * a_td;
     };
 */
 static int
 fuse_vnop_fdatasync(struct vop_fdatasync_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 	int waitfor = MNT_WAIT;
 
 	int err = 0;
 
 	if (fuse_isdeadfs(vp)) {
 		return 0;
 	}
 	if ((err = vop_stdfdatasync_buf(ap)))
 		return err;
 
 	return fuse_internal_fsync(vp, td, waitfor, true);
 }
 
 /*
     struct vnop_fsync_args {
 	struct vop_generic_args a_gen;
 	struct vnode * a_vp;
 	int  a_waitfor;
 	struct thread * a_td;
     };
 */
 static int
 fuse_vnop_fsync(struct vop_fsync_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 	int waitfor = ap->a_waitfor;
 	int err = 0;
 
 	if (fuse_isdeadfs(vp)) {
 		return 0;
 	}
 	if ((err = vop_stdfsync(ap)))
 		return err;
 
 	return fuse_internal_fsync(vp, td, waitfor, false);
 }
 
 /*
     struct vnop_getattr_args {
 	struct vnode *a_vp;
 	struct vattr *a_vap;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_attr_out *fao;
 
 	int err = 0;
 	int dataflags;
 	struct fuse_dispatcher fdi;
 
 	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
 
 	/* Note that we are not bailing out on a dead file system just yet. */
 
 	if (!(dataflags & FSESS_INITED)) {
 		if (!vnode_isvroot(vp)) {
 			fdata_set_dead(fuse_get_mpdata(vnode_mount(vp)));
 			err = ENOTCONN;
 			return err;
 		} else {
 			goto fake;
 		}
 	}
 	fdisp_init(&fdi, 0);
 	if ((err = fdisp_simple_putget_vp(&fdi, FUSE_GETATTR, vp, td, cred))) {
 		if ((err == ENOTCONN) && vnode_isvroot(vp)) {
 			/* see comment in fuse_vfsop_statfs() */
 			fdisp_destroy(&fdi);
 			goto fake;
 		}
 		if (err == ENOENT) {
 			fuse_internal_vnode_disappear(vp);
 		}
 		goto out;
 	}
 
 	fao = (struct fuse_attr_out *)fdi.answ;
 	fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
 		fao->attr_valid_nsec, vap);
 	if (vap->va_type != vnode_vtype(vp)) {
 		fuse_internal_vnode_disappear(vp);
 		err = ENOENT;
 		goto out;
 	}
 	if ((fvdat->flag & FN_SIZECHANGE) != 0)
 		vap->va_size = fvdat->filesize;
 
 	if (vnode_isreg(vp) && (fvdat->flag & FN_SIZECHANGE) == 0) {
 		/*
 	         * This is for those cases when the file size changed without us
 	         * knowing, and we want to catch up.
 	         */
 		off_t new_filesize = ((struct fuse_attr_out *)
 				      fdi.answ)->attr.size;
 
 		if (fvdat->filesize != new_filesize) {
 			fuse_vnode_setsize(vp, cred, new_filesize);
 			fvdat->flag &= ~FN_SIZECHANGE;
 		}
 	}
 
 out:
 	fdisp_destroy(&fdi);
 	return err;
 
 fake:
 	bzero(vap, sizeof(*vap));
 	vap->va_type = vnode_vtype(vp);
 
 	return 0;
 }
 
 /*
     struct vnop_inactive_args {
 	struct vnode *a_vp;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_inactive(struct vop_inactive_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_filehandle *fufh, *fufh_tmp;
 
 	int need_flush = 1;
 
 	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
 		if (need_flush && vp->v_type == VREG) {
 			if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
 				fuse_vnode_savesize(vp, NULL, 0);
 			}
 			if (fuse_data_cache_invalidate ||
 			    (fvdat->flag & FN_REVOKED) != 0)
 				fuse_io_invalbuf(vp, td);
 			else
 				fuse_io_flushbuf(vp, MNT_WAIT, td);
 			need_flush = 0;
 		}
 		fuse_filehandle_close(vp, fufh, td, NULL);
 	}
 
 	if ((fvdat->flag & FN_REVOKED) != 0 && fuse_reclaim_revoked) {
 		vrecycle(vp);
 	}
 	return 0;
 }
 
 /*
     struct vnop_link_args {
 	struct vnode *a_tdvp;
 	struct vnode *a_vp;
 	struct componentname *a_cnp;
     };
 */
 static int
 fuse_vnop_link(struct vop_link_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 
 	struct vattr *vap = VTOVA(vp);
 
 	struct fuse_dispatcher fdi;
 	struct fuse_entry_out *feo;
 	struct fuse_link_in fli;
 
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (vnode_mount(tdvp) != vnode_mount(vp)) {
 		return EXDEV;
 	}
 
 	/*
 	 * This is a seatbelt check to protect naive userspace filesystems from
 	 * themselves and the limitations of the FUSE IPC protocol.  If a
 	 * filesystem does not allow attribute caching, assume it is capable of
 	 * validating that nlink does not overflow.
 	 */
 	if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX)
 		return EMLINK;
 	fli.oldnodeid = VTOI(vp);
 
 	fdisp_init(&fdi, 0);
 	fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp,
 	    FUSE_LINK, &fli, sizeof(fli), &fdi);
 	if ((err = fdisp_wait_answ(&fdi))) {
 		goto out;
 	}
 	feo = fdi.answ;
 
 	err = fuse_internal_checkentry(feo, vnode_vtype(vp));
 out:
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /*
     struct vnop_lookup_args {
 	struct vnodeop_desc *a_desc;
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
     };
 */
 int
 fuse_vnop_lookup(struct vop_lookup_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct thread *td = cnp->cn_thread;
 	struct ucred *cred = cnp->cn_cred;
 
 	int nameiop = cnp->cn_nameiop;
 	int flags = cnp->cn_flags;
 	int wantparent = flags & (LOCKPARENT | WANTPARENT);
 	int islastcn = flags & ISLASTCN;
 	struct mount *mp = vnode_mount(dvp);
 
 	int err = 0;
 	int lookup_err = 0;
 	struct vnode *vp = NULL;
 
 	struct fuse_dispatcher fdi;
 	enum fuse_opcode op;
 
 	uint64_t nid;
 	struct fuse_access_param facp;
 
 	if (fuse_isdeadfs(dvp)) {
 		*vpp = NULL;
 		return ENXIO;
 	}
 	if (!vnode_isdir(dvp)) {
 		return ENOTDIR;
 	}
 	if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP)) {
 		return EROFS;
 	}
 	/*
 	 * We do access check prior to doing anything else only in the case
 	 * when we are at fs root (we'd like to say, "we are at the first
 	 * component", but that's not exactly the same... nevermind).
 	 * See further comments at further access checks.
 	 */
 
 	bzero(&facp, sizeof(facp));
 	if (vnode_isvroot(dvp)) {	/* early permission check hack */
 		if ((err = fuse_internal_access(dvp, VEXEC, &facp, td, cred))) {
 			return err;
 		}
 	}
 	if (flags & ISDOTDOT) {
 		nid = VTOFUD(dvp)->parent_nid;
 		if (nid == 0) {
 			return ENOENT;
 		}
 		fdisp_init(&fdi, 0);
 		op = FUSE_GETATTR;
 		goto calldaemon;
 	} else if (cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.') {
 		nid = VTOI(dvp);
 		fdisp_init(&fdi, 0);
 		op = FUSE_GETATTR;
 		goto calldaemon;
 	} else if (fuse_lookup_cache_enable) {
 		err = cache_lookup(dvp, vpp, cnp, NULL, NULL);
 		switch (err) {
 
 		case -1:		/* positive match */
 			atomic_add_acq_long(&fuse_lookup_cache_hits, 1);
 			return 0;
 
 		case 0:		/* no match in cache */
 			atomic_add_acq_long(&fuse_lookup_cache_misses, 1);
 			break;
 
 		case ENOENT:		/* negative match */
 			/* fall through */
 		default:
 			return err;
 		}
 	}
 	nid = VTOI(dvp);
 	fdisp_init(&fdi, cnp->cn_namelen + 1);
 	op = FUSE_LOOKUP;
 
 calldaemon:
 	fdisp_make(&fdi, op, mp, nid, td, cred);
 
 	if (op == FUSE_LOOKUP) {
 		memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
 		((char *)fdi.indata)[cnp->cn_namelen] = '\0';
 	}
 	lookup_err = fdisp_wait_answ(&fdi);
 
 	if ((op == FUSE_LOOKUP) && !lookup_err) {	/* lookup call succeeded */
 		nid = ((struct fuse_entry_out *)fdi.answ)->nodeid;
 		if (!nid) {
 			/*
 	                 * zero nodeid is the same as "not found",
 	                 * but it's also cacheable (which we keep
 	                 * keep on doing not as of writing this)
 	                 */
 			fdi.answ_stat = ENOENT;
 			lookup_err = ENOENT;
 		} else if (nid == FUSE_ROOT_ID) {
 			lookup_err = EINVAL;
 		}
 	}
 	if (lookup_err &&
 	    (!fdi.answ_stat || lookup_err != ENOENT || op != FUSE_LOOKUP)) {
 		fdisp_destroy(&fdi);
 		return lookup_err;
 	}
 	/* lookup_err, if non-zero, must be ENOENT at this point */
 
 	if (lookup_err) {
 
 		if ((nameiop == CREATE || nameiop == RENAME) && islastcn
 		     /* && directory dvp has not been removed */ ) {
 
 			if (vfs_isrdonly(mp)) {
 				err = EROFS;
 				goto out;
 			}
 #if 0 /* THINK_ABOUT_THIS */
 			if ((err = fuse_internal_access(dvp, VWRITE, cred, td, &facp))) {
 				goto out;
 			}
 #endif
 
 			/*
 	                 * Possibly record the position of a slot in the
 	                 * directory large enough for the new component name.
 	                 * This can be recorded in the vnode private data for
 	                 * dvp. Set the SAVENAME flag to hold onto the
 	                 * pathname for use later in VOP_CREATE or VOP_RENAME.
 	                 */
 			cnp->cn_flags |= SAVENAME;
 
 			err = EJUSTRETURN;
 			goto out;
 		}
 		/* Consider inserting name into cache. */
 
 		/*
 	         * No we can't use negative caching, as the fs
 	         * changes are out of our control.
 	         * False positives' falseness turns out just as things
 	         * go by, but false negatives' falseness doesn't.
 	         * (and aiding the caching mechanism with extra control
 	         * mechanisms comes quite close to beating the whole purpose
 	         * caching...)
 	         */
 #if 0
 		if ((cnp->cn_flags & MAKEENTRY) != 0) {
 			SDT_PROBE2(fuse, , vnops, trace, 1,
 				"inserting NULL into cache");
 			cache_enter(dvp, NULL, cnp);
 		}
 #endif
 		err = ENOENT;
 		goto out;
 
 	} else {
 
 		/* !lookup_err */
 
 		struct fuse_entry_out *feo = NULL;
 		struct fuse_attr *fattr = NULL;
 
 		if (op == FUSE_GETATTR) {
 			fattr = &((struct fuse_attr_out *)fdi.answ)->attr;
 		} else {
 			feo = (struct fuse_entry_out *)fdi.answ;
 			fattr = &(feo->attr);
 		}
 
 		/*
 	         * If deleting, and at end of pathname, return parameters
 	         * which can be used to remove file.  If the wantparent flag
 	         * isn't set, we return only the directory, otherwise we go on
 	         * and lock the inode, being careful with ".".
 	         */
 		if (nameiop == DELETE && islastcn) {
 			/*
 	                 * Check for write access on directory.
 	                 */
 			facp.xuid = fattr->uid;
 			facp.facc_flags |= FACCESS_STICKY;
 			err = fuse_internal_access(dvp, VWRITE, &facp, td, cred);
 			facp.facc_flags &= ~FACCESS_XQUERIES;
 
 			if (err) {
 				goto out;
 			}
 			if (nid == VTOI(dvp)) {
 				vref(dvp);
 				*vpp = dvp;
 			} else {
 				err = fuse_vnode_get(dvp->v_mount, feo, nid,
 				    dvp, &vp, cnp, IFTOVT(fattr->mode));
 				if (err)
 					goto out;
 				*vpp = vp;
 			}
 
 			/*
 			 * Save the name for use in VOP_RMDIR and VOP_REMOVE
 			 * later.
 			 */
 			cnp->cn_flags |= SAVENAME;
 			goto out;
 
 		}
 		/*
 	         * If rewriting (RENAME), return the inode and the
 	         * information required to rewrite the present directory
 	         * Must get inode of directory entry to verify it's a
 	         * regular file, or empty directory.
 	         */
 		if (nameiop == RENAME && wantparent && islastcn) {
 
 #if 0 /* THINK_ABOUT_THIS */
 			if ((err = fuse_internal_access(dvp, VWRITE, cred, td, &facp))) {
 				goto out;
 			}
 #endif
 
 			/*
 	                 * Check for "."
 	                 */
 			if (nid == VTOI(dvp)) {
 				err = EISDIR;
 				goto out;
 			}
 			err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp,
 			    &vp, cnp, IFTOVT(fattr->mode));
 			if (err) {
 				goto out;
 			}
 			*vpp = vp;
 			/*
 	                 * Save the name for use in VOP_RENAME later.
 	                 */
 			cnp->cn_flags |= SAVENAME;
 
 			goto out;
 		}
 		if (flags & ISDOTDOT) {
 			struct mount *mp;
 			int ltype;
 
 			/*
 			 * Expanded copy of vn_vget_ino() so that
 			 * fuse_vnode_get() can be used.
 			 */
 			mp = dvp->v_mount;
 			ltype = VOP_ISLOCKED(dvp);
 			err = vfs_busy(mp, MBF_NOWAIT);
 			if (err != 0) {
 				vfs_ref(mp);
 				VOP_UNLOCK(dvp, 0);
 				err = vfs_busy(mp, 0);
 				vn_lock(dvp, ltype | LK_RETRY);
 				vfs_rel(mp);
 				if (err)
 					goto out;
 				if ((dvp->v_iflag & VI_DOOMED) != 0) {
 					err = ENOENT;
 					vfs_unbusy(mp);
 					goto out;
 				}
 			}
 			VOP_UNLOCK(dvp, 0);
 			err = fuse_vnode_get(vnode_mount(dvp), feo, nid, NULL,
 			    &vp, cnp, IFTOVT(fattr->mode));
 			vfs_unbusy(mp);
 			vn_lock(dvp, ltype | LK_RETRY);
 			if ((dvp->v_iflag & VI_DOOMED) != 0) {
 				if (err == 0)
 					vput(vp);
 				err = ENOENT;
 			}
 			if (err)
 				goto out;
 			*vpp = vp;
 		} else if (nid == VTOI(dvp)) {
 			vref(dvp);
 			*vpp = dvp;
 		} else {
 			struct fuse_vnode_data *fvdat;
 
 			err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp,
 			    &vp, cnp, IFTOVT(fattr->mode));
 			if (err) {
 				goto out;
 			}
 			fuse_vnode_setparent(vp, dvp);
 
 			/*
 			 * In the case where we are looking up a FUSE node
 			 * represented by an existing cached vnode, and the
 			 * true size reported by FUSE_LOOKUP doesn't match
 			 * the vnode's cached size, fix the vnode cache to
 			 * match the real object size.
 			 *
 			 * This can occur via FUSE distributed filesystems,
 			 * irregular files, etc.
 			 */
 			fvdat = VTOFUD(vp);
 			if (vnode_isreg(vp) &&
 			    fattr->size != fvdat->filesize) {
 				/*
 				 * The FN_SIZECHANGE flag reflects a dirty
 				 * append.  If userspace lets us know our cache
 				 * is invalid, that write was lost.  (Dirty
 				 * writes that do not cause append are also
 				 * lost, but we don't detect them here.)
 				 *
 				 * XXX: Maybe disable WB caching on this mount.
 				 */
 				if (fvdat->flag & FN_SIZECHANGE)
 					printf("%s: WB cache incoherent on "
 					    "%s!\n", __func__,
 					    vnode_mount(vp)->mnt_stat.f_mntonname);
 
 				(void)fuse_vnode_setsize(vp, cred, fattr->size);
 				fvdat->flag &= ~FN_SIZECHANGE;
 			}
 			*vpp = vp;
 		}
 
 		if (op == FUSE_GETATTR) {
 			struct fuse_attr_out *fao =
 				(struct fuse_attr_out*)fdi.answ;
 			fuse_internal_cache_attrs(*vpp,
 				&fao->attr, fao->attr_valid,
 				fao->attr_valid_nsec, NULL);
 		} else {
 			struct fuse_entry_out *feo =
 				(struct fuse_entry_out*)fdi.answ;
 			fuse_internal_cache_attrs(*vpp,
 				&feo->attr, feo->attr_valid,
 				feo->attr_valid_nsec, NULL);
 		}
 
 		/* Insert name into cache if appropriate. */
 
 		/*
 	         * Nooo, caching is evil. With caching, we can't avoid stale
 	         * information taking over the playground (cached info is not
 	         * just positive/negative, it does have qualitative aspects,
 	         * too). And a (VOP/FUSE)_GETATTR is always thrown anyway, when
 	         * walking down along cached path components, and that's not
 	         * any cheaper than FUSE_LOOKUP. This might change with
 	         * implementing kernel side attr caching, but... In Linux,
 	         * lookup results are not cached, and the daemon is bombarded
 	         * with FUSE_LOOKUPS on and on. This shows that by design, the
 	         * daemon is expected to handle frequent lookup queries
 	         * efficiently, do its caching in userspace, and so on.
 	         *
 	         * So just leave the name cache alone.
 	         */
 
 		/*
 	         * Well, now I know, Linux caches lookups, but with a
 	         * timeout... So it's the same thing as attribute caching:
 	         * we can deal with it when implement timeouts.
 	         */
 #if 0
 		if (cnp->cn_flags & MAKEENTRY) {
 			cache_enter(dvp, *vpp, cnp);
 		}
 #endif
 	}
 out:
 	if (!lookup_err) {
 
 		/* No lookup error; need to clean up. */
 
 		if (err) {		/* Found inode; exit with no vnode. */
 			if (op == FUSE_LOOKUP) {
 				fuse_internal_forget_send(vnode_mount(dvp), td, cred,
 				    nid, 1);
 			}
 			fdisp_destroy(&fdi);
 			return err;
 		} else {
 #ifndef NO_EARLY_PERM_CHECK_HACK
 			if (!islastcn) {
 				/*
 				 * We have the attributes of the next item
 				 * *now*, and it's a fact, and we do not
 				 * have to do extra work for it (ie, beg the
 				 * daemon), and it neither depends on such
 				 * accidental things like attr caching. So
 				 * the big idea: check credentials *now*,
 				 * not at the beginning of the next call to
 				 * lookup.
 				 * 
 				 * The first item of the lookup chain (fs root)
 				 * won't be checked then here, of course, as
 				 * its never "the next". But go and see that
 				 * the root is taken care about at the very
 				 * beginning of this function.
 				 * 
 				 * Now, given we want to do the access check
 				 * this way, one might ask: so then why not
 				 * do the access check just after fetching
 				 * the inode and its attributes from the
 				 * daemon? Why bother with producing the
 				 * corresponding vnode at all if something
 				 * is not OK? We know what's the deal as
 				 * soon as we get those attrs... There is
 				 * one bit of info though not given us by
 				 * the daemon: whether his response is
 				 * authoritative or not... His response should
 				 * be ignored if something is mounted over
 				 * the dir in question. But that can be
 				 * known only by having the vnode...
 				 */
 				int tmpvtype = vnode_vtype(*vpp);
 
 				bzero(&facp, sizeof(facp));
 				/*the early perm check hack */
 				    facp.facc_flags |= FACCESS_VA_VALID;
 
 				if ((tmpvtype != VDIR) && (tmpvtype != VLNK)) {
 					err = ENOTDIR;
 				}
 				if (!err && !vnode_mountedhere(*vpp)) {
 					err = fuse_internal_access(*vpp, VEXEC, &facp, td, cred);
 				}
 				if (err) {
 					if (tmpvtype == VLNK)
 						SDT_PROBE2(fuse, , vnops, trace,
 						    1, "weird, permission "
 						    "error with a symlink?");
 					vput(*vpp);
 					*vpp = NULL;
 				}
 			}
 #endif
 		}
 	}
 	fdisp_destroy(&fdi);
 
 	return err;
 }
 
 /*
     struct vnop_mkdir_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
     };
 */
 static int
 fuse_vnop_mkdir(struct vop_mkdir_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct vattr *vap = ap->a_vap;
 
 	struct fuse_mkdir_in fmdi;
 
 	if (fuse_isdeadfs(dvp)) {
 		return ENXIO;
 	}
 	fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode);
 
 	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi,
 	    sizeof(fmdi), VDIR));
 }
 
 /*
     struct vnop_mknod_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
     };
 */
 static int
 fuse_vnop_mknod(struct vop_mknod_args *ap)
 {
 
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct vattr *vap = ap->a_vap;
 	struct fuse_mknod_in fmni;
 
 	if (fuse_isdeadfs(dvp))
 		return ENXIO;
 
 	fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode);
 	fmni.rdev = vap->va_rdev;
 	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni,
 	    sizeof(fmni), vap->va_type));
 }
 
 /*
     struct vnop_open_args {
 	struct vnode *a_vp;
 	int  a_mode;
 	struct ucred *a_cred;
 	struct thread *a_td;
 	int a_fdidx; / struct file *a_fp;
     };
 */
 static int
 fuse_vnop_open(struct vop_open_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	int a_mode = ap->a_mode;
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	pid_t pid = td->td_proc->p_pid;
 	struct fuse_vnode_data *fvdat;
 
 	if (fuse_isdeadfs(vp))
 		return ENXIO;
 	if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO)
 		return (EOPNOTSUPP);
 	if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0)
 		return EINVAL;
 
 	fvdat = VTOFUD(vp);
 
 	if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) {
 		fuse_vnode_open(vp, 0, td);
 		return 0;
 	}
 
 	return fuse_filehandle_open(vp, a_mode, NULL, td, cred);
 }
 
 static int
 fuse_vnop_pathconf(struct vop_pathconf_args *ap)
 {
 
 	switch (ap->a_name) {
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		return (0);
 	case _PC_LINK_MAX:
 		*ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX);
 		return (0);
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = MAXPATHLEN;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		return (0);
 	default:
 		return (vop_stdpathconf(ap));
 	}
 }
 
 /*
     struct vnop_read_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	int  a_ioflag;
 	struct ucred *a_cred;
     };
 */
 static int
 fuse_vnop_read(struct vop_read_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	int ioflag = ap->a_ioflag;
 	struct ucred *cred = ap->a_cred;
 	pid_t pid = curthread->td_proc->p_pid;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 
 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
 		ioflag |= IO_DIRECT;
 	}
 
 	return fuse_io_dispatch(vp, uio, ioflag, cred, pid);
 }
 
 /*
     struct vnop_readdir_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	struct ucred *a_cred;
 	int *a_eofflag;
 	int *ncookies;
 	u_long **a_cookies;
     };
 */
 static int
 fuse_vnop_readdir(struct vop_readdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct ucred *cred = ap->a_cred;
 	struct fuse_filehandle *fufh = NULL;
 	struct fuse_iov cookediov;
 	int err = 0;
 	pid_t pid = curthread->td_proc->p_pid;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (				/* XXXIP ((uio_iovcnt(uio) > 1)) || */
 	    (uio_resid(uio) < sizeof(struct dirent))) {
 		return EINVAL;
 	}
 
 	err = fuse_filehandle_get_dir(vp, &fufh, cred, pid);
 	if (err)
 		return (err);
 #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1)
 	fiov_init(&cookediov, DIRCOOKEDSIZE);
 
 	err = fuse_internal_readdir(vp, uio, fufh, &cookediov);
 
 	fiov_teardown(&cookediov);
 
 	return err;
 }
 
 /*
     struct vnop_readlink_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	struct ucred *a_cred;
     };
 */
 static int
 fuse_vnop_readlink(struct vop_readlink_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct ucred *cred = ap->a_cred;
 
 	struct fuse_dispatcher fdi;
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (!vnode_islnk(vp)) {
 		return EINVAL;
 	}
 	fdisp_init(&fdi, 0);
 	err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred);
 	if (err) {
 		goto out;
 	}
 	if (((char *)fdi.answ)[0] == '/' &&
 	    fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) {
 		char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname;
 
 		err = uiomove(mpth, strlen(mpth), uio);
 	}
 	if (!err) {
 		err = uiomove(fdi.answ, fdi.iosize, uio);
 	}
 out:
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /*
     struct vnop_reclaim_args {
 	struct vnode *a_vp;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_reclaim(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_filehandle *fufh, *fufh_tmp;
 
 	if (!fvdat) {
 		panic("FUSE: no vnode data during recycling");
 	}
 	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
 		printf("FUSE: vnode being reclaimed with open fufh "
 			"(type=%#x)", fufh->fufh_type);
 		fuse_filehandle_close(vp, fufh, td, NULL);
 	}
 
 	if ((!fuse_isdeadfs(vp)) && (fvdat->nlookup)) {
 		fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp),
 		    fvdat->nlookup);
 	}
 	fuse_vnode_setparent(vp, NULL);
 	cache_purge(vp);
 	vfs_hash_remove(vp);
 	vnode_destroy_vobject(vp);
 	fuse_vnode_destroy(vp);
 
 	return 0;
 }
 
 /*
     struct vnop_remove_args {
 	struct vnode *a_dvp;
 	struct vnode *a_vp;
 	struct componentname *a_cnp;
     };
 */
 static int
 fuse_vnop_remove(struct vop_remove_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode *vp = ap->a_vp;
 	struct componentname *cnp = ap->a_cnp;
 
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (vnode_isdir(vp)) {
 		return EPERM;
 	}
 	cache_purge(vp);
 
 	err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK);
 
 	if (err == 0)
 		fuse_internal_vnode_disappear(vp);
 	return err;
 }
 
 /*
     struct vnop_rename_args {
 	struct vnode *a_fdvp;
 	struct vnode *a_fvp;
 	struct componentname *a_fcnp;
 	struct vnode *a_tdvp;
 	struct vnode *a_tvp;
 	struct componentname *a_tcnp;
     };
 */
 static int
 fuse_vnop_rename(struct vop_rename_args *ap)
 {
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *tvp = ap->a_tvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct fuse_data *data;
 
 	int err = 0;
 
 	if (fuse_isdeadfs(fdvp)) {
 		return ENXIO;
 	}
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp && fvp->v_mount != tvp->v_mount)) {
 		SDT_PROBE2(fuse, , vnops, trace, 1, "cross-device rename");
 		err = EXDEV;
 		goto out;
 	}
 	cache_purge(fvp);
 
 	/*
 	 * FUSE library is expected to check if target directory is not
 	 * under the source directory in the file system tree.
 	 * Linux performs this check at VFS level.
 	 */
 	data = fuse_get_mpdata(vnode_mount(tdvp));
 	sx_xlock(&data->rename_lock);
 	err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
 	if (err == 0) {
 		if (tdvp != fdvp)
 			fuse_vnode_setparent(fvp, tdvp);
 		if (tvp != NULL)
 			fuse_vnode_setparent(tvp, NULL);
 	}
 	sx_unlock(&data->rename_lock);
 
 	if (tvp != NULL && tvp != fvp) {
 		cache_purge(tvp);
 	}
 	if (vnode_isdir(fvp)) {
 		if ((tvp != NULL) && vnode_isdir(tvp)) {
 			cache_purge(tdvp);
 		}
 		cache_purge(fdvp);
 	}
 out:
 	if (tdvp == tvp) {
 		vrele(tdvp);
 	} else {
 		vput(tdvp);
 	}
 	if (tvp != NULL) {
 		vput(tvp);
 	}
 	vrele(fdvp);
 	vrele(fvp);
 
 	return err;
 }
 
 /*
     struct vnop_rmdir_args {
 	    struct vnode *a_dvp;
 	    struct vnode *a_vp;
 	    struct componentname *a_cnp;
     } *ap;
 */
 static int
 fuse_vnop_rmdir(struct vop_rmdir_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode *vp = ap->a_vp;
 
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (VTOFUD(vp) == VTOFUD(dvp)) {
 		return EINVAL;
 	}
 	err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR);
 
 	if (err == 0)
 		fuse_internal_vnode_disappear(vp);
 	return err;
 }
 
 /*
     struct vnop_setattr_args {
 	struct vnode *a_vp;
 	struct vattr *a_vap;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_setattr(struct vop_setattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 	struct fuse_dispatcher fdi;
 	struct fuse_setattr_in *fsai;
 	struct fuse_access_param facp;
 	pid_t pid = td->td_proc->p_pid;
 
 	int err = 0;
 	enum vtype vtyp;
 	int sizechanged = 0;
 	uint64_t newsize = 0;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	fdisp_init(&fdi, sizeof(*fsai));
 	fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred);
 	fsai = fdi.indata;
 	fsai->valid = 0;
 
 	bzero(&facp, sizeof(facp));
 
 	facp.xuid = vap->va_uid;
 	facp.xgid = vap->va_gid;
 
 	if (vap->va_uid != (uid_t)VNOVAL) {
 		facp.facc_flags |= FACCESS_CHOWN;
 		fsai->uid = vap->va_uid;
 		fsai->valid |= FATTR_UID;
 	}
 	if (vap->va_gid != (gid_t)VNOVAL) {
 		facp.facc_flags |= FACCESS_CHOWN;
 		fsai->gid = vap->va_gid;
 		fsai->valid |= FATTR_GID;
 	}
 	if (vap->va_size != VNOVAL) {
 
 		struct fuse_filehandle *fufh = NULL;
 
 		/*Truncate to a new value. */
 		    fsai->size = vap->va_size;
 		sizechanged = 1;
 		newsize = vap->va_size;
 		fsai->valid |= FATTR_SIZE;
 
 		fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
 		if (fufh) {
 			fsai->fh = fufh->fh_id;
 			fsai->valid |= FATTR_FH;
 		}
 	}
 	if (vap->va_atime.tv_sec != VNOVAL) {
 		fsai->atime = vap->va_atime.tv_sec;
 		fsai->atimensec = vap->va_atime.tv_nsec;
 		fsai->valid |= FATTR_ATIME;
 	}
 	if (vap->va_mtime.tv_sec != VNOVAL) {
 		fsai->mtime = vap->va_mtime.tv_sec;
 		fsai->mtimensec = vap->va_mtime.tv_nsec;
 		fsai->valid |= FATTR_MTIME;
 	}
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		fsai->mode = vap->va_mode & ALLPERMS;
 		fsai->valid |= FATTR_MODE;
 	}
 	if (!fsai->valid) {
 		goto out;
 	}
 	vtyp = vnode_vtype(vp);
 
 	if (fsai->valid & FATTR_SIZE && vtyp == VDIR) {
 		err = EISDIR;
 		goto out;
 	}
 	if (vfs_isrdonly(vnode_mount(vp)) && (fsai->valid & ~FATTR_SIZE || vtyp == VREG)) {
 		err = EROFS;
 		goto out;
 	}
 
 	if ((err = fdisp_wait_answ(&fdi)))
 		goto out;
 	vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode);
 
 	if (vnode_vtype(vp) != vtyp) {
 		if (vnode_vtype(vp) == VNON && vtyp != VNON) {
 			SDT_PROBE2(fuse, , vnops, trace, 1, "FUSE: Dang! "
 				"vnode_vtype is VNON and vtype isn't.");
 		} else {
 			/*
 	                 * STALE vnode, ditch
 	                 *
 			 * The vnode has changed its type "behind our back".
 			 * There's nothing really we can do, so let us just
 			 * force an internal revocation and tell the caller to
 			 * try again, if interested.
 	                 */
 			fuse_internal_vnode_disappear(vp);
 			err = EAGAIN;
 		}
 	}
 	if (err == 0) {
 		struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ;
 		fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
 			fao->attr_valid_nsec, NULL);
 	}
 
 out:
 	fdisp_destroy(&fdi);
 	if (!err && sizechanged) {
 		fuse_vnode_setsize(vp, cred, newsize);
 		VTOFUD(vp)->flag &= ~FN_SIZECHANGE;
 	}
 	return err;
 }
 
 /*
     struct vnop_strategy_args {
 	struct vnode *a_vp;
 	struct buf *a_bp;
     };
 */
 static int
 fuse_vnop_strategy(struct vop_strategy_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct buf *bp = ap->a_bp;
 
 	if (!vp || fuse_isdeadfs(vp)) {
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = ENXIO;
 		bufdone(bp);
 		return 0;
 	}
 	if (bp->b_iocmd == BIO_WRITE) {
 		int err;
 
 		err = fuse_vnode_refreshsize(vp, NOCRED);
 		if (err) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = err;
 			bufdone(bp);
 			return 0;
 		}
 	}
 
 	/*
 	 * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags.
 	 * fuse_io_strategy sets bp's error fields
 	 */
 	(void)fuse_io_strategy(vp, bp);
 
 	return 0;
 }
 
 
 /*
     struct vnop_symlink_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
 	char *a_target;
     };
 */
 static int
 fuse_vnop_symlink(struct vop_symlink_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	const char *target = ap->a_target;
 
 	struct fuse_dispatcher fdi;
 
 	int err;
 	size_t len;
 
 	if (fuse_isdeadfs(dvp)) {
 		return ENXIO;
 	}
 	/*
 	 * Unlike the other creator type calls, here we have to create a message
 	 * where the name of the new entry comes first, and the data describing
 	 * the entry comes second.
 	 * Hence we can't rely on our handy fuse_internal_newentry() routine,
 	 * but put together the message manually and just call the core part.
 	 */
 
 	len = strlen(target) + 1;
 	fdisp_init(&fdi, len + cnp->cn_namelen + 1);
 	fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL);
 
 	memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
 	((char *)fdi.indata)[cnp->cn_namelen] = '\0';
 	memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len);
 
 	err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi);
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /*
     struct vnop_write_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	int  a_ioflag;
 	struct ucred *a_cred;
     };
 */
 static int
 fuse_vnop_write(struct vop_write_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	int ioflag = ap->a_ioflag;
 	struct ucred *cred = ap->a_cred;
 	pid_t pid = curthread->td_proc->p_pid;
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	err = fuse_vnode_refreshsize(vp, cred);
 	if (err)
 		return err;
 
 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
 		ioflag |= IO_DIRECT;
 	}
 
 	return fuse_io_dispatch(vp, uio, ioflag, cred, pid);
 }
 
 SDT_PROBE_DEFINE1(fuse, , vnops, vnop_getpages_error, "int");
 /*
     struct vnop_getpages_args {
 	struct vnode *a_vp;
 	vm_page_t *a_m;
 	int a_count;
 	int a_reqpage;
     };
 */
 static int
 fuse_vnop_getpages(struct vop_getpages_args *ap)
 {
 	int i, error, nextoff, size, toff, count, npages;
 	struct uio uio;
 	struct iovec iov;
 	vm_offset_t kva;
 	struct buf *bp;
 	struct vnode *vp;
 	struct thread *td;
 	struct ucred *cred;
 	vm_page_t *pages;
 	pid_t pid = curthread->td_proc->p_pid;
 
 	vp = ap->a_vp;
 	KASSERT(vp->v_object, ("objectless vp passed to getpages"));
 	td = curthread;			/* XXX */
 	cred = curthread->td_ucred;	/* XXX */
 	pages = ap->a_m;
 	npages = ap->a_count;
 
 	if (!fsess_opt_mmap(vnode_mount(vp))) {
 		SDT_PROBE2(fuse, , vnops, trace, 1,
 			"called on non-cacheable vnode??\n");
 		return (VM_PAGER_ERROR);
 	}
 
 	/*
 	 * If the last page is partially valid, just return it and allow
 	 * the pager to zero-out the blanks.  Partially valid pages can
 	 * only occur at the file EOF.
 	 *
 	 * XXXGL: is that true for FUSE, which is a local filesystem,
 	 * but still somewhat disconnected from the kernel?
 	 */
 	VM_OBJECT_WLOCK(vp->v_object);
 	if (pages[npages - 1]->valid != 0 && --npages == 0)
 		goto out;
 	VM_OBJECT_WUNLOCK(vp->v_object);
 
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
 	 * convenient and fast.
 	 */
 	bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
 
 	kva = (vm_offset_t)bp->b_data;
 	pmap_qenter(kva, pages, npages);
 	VM_CNT_INC(v_vnodein);
 	VM_CNT_ADD(v_vnodepgsin, npages);
 
 	count = npages << PAGE_SHIFT;
 	iov.iov_base = (caddr_t)kva;
 	iov.iov_len = count;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
 	uio.uio_resid = count;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_td = td;
 
 	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred, pid);
 	pmap_qremove(kva, npages);
 
 	uma_zfree(fuse_pbuf_zone, bp);
 
 	if (error && (uio.uio_resid == count)) {
 		SDT_PROBE1(fuse, , vnops, vnop_getpages_error, error);
 		return VM_PAGER_ERROR;
 	}
 	/*
 	 * Calculate the number of bytes read and validate only that number
 	 * of bytes.  Note that due to pending writes, size may be 0.  This
 	 * does not mean that the remaining data is invalid!
 	 */
 
 	size = count - uio.uio_resid;
 	VM_OBJECT_WLOCK(vp->v_object);
 	fuse_vm_page_lock_queues();
 	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
 		vm_page_t m;
 
 		nextoff = toff + PAGE_SIZE;
 		m = pages[i];
 
 		if (nextoff <= size) {
 			/*
 			 * Read operation filled an entire page
 			 */
 			m->valid = VM_PAGE_BITS_ALL;
 			KASSERT(m->dirty == 0,
 			    ("fuse_getpages: page %p is dirty", m));
 		} else if (size > toff) {
 			/*
 			 * Read operation filled a partial page.
 			 */
 			m->valid = 0;
 			vm_page_set_valid_range(m, 0, size - toff);
 			KASSERT(m->dirty == 0,
 			    ("fuse_getpages: page %p is dirty", m));
 		} else {
 			/*
 			 * Read operation was short.  If no error occurred
 			 * we may have hit a zero-fill section.   We simply
 			 * leave valid set to 0.
 			 */
 			;
 		}
 	}
 	fuse_vm_page_unlock_queues();
 out:
 	VM_OBJECT_WUNLOCK(vp->v_object);
 	if (ap->a_rbehind)
 		*ap->a_rbehind = 0;
 	if (ap->a_rahead)
 		*ap->a_rahead = 0;
 	return (VM_PAGER_OK);
 }
 
 /*
     struct vnop_putpages_args {
 	struct vnode *a_vp;
 	vm_page_t *a_m;
 	int a_count;
 	int a_sync;
 	int *a_rtvals;
 	vm_ooffset_t a_offset;
     };
 */
 static int
 fuse_vnop_putpages(struct vop_putpages_args *ap)
 {
 	struct uio uio;
 	struct iovec iov;
 	vm_offset_t kva;
 	struct buf *bp;
 	int i, error, npages, count;
 	off_t offset;
 	int *rtvals;
 	struct vnode *vp;
 	struct thread *td;
 	struct ucred *cred;
 	vm_page_t *pages;
 	vm_ooffset_t fsize;
 	pid_t pid = curthread->td_proc->p_pid;
 
 	vp = ap->a_vp;
 	KASSERT(vp->v_object, ("objectless vp passed to putpages"));
 	fsize = vp->v_object->un_pager.vnp.vnp_size;
 	td = curthread;			/* XXX */
 	cred = curthread->td_ucred;	/* XXX */
 	pages = ap->a_m;
 	count = ap->a_count;
 	rtvals = ap->a_rtvals;
 	npages = btoc(count);
 	offset = IDX_TO_OFF(pages[0]->pindex);
 
 	if (!fsess_opt_mmap(vnode_mount(vp))) {
 		SDT_PROBE2(fuse, , vnops, trace, 1,
 			"called on non-cacheable vnode??\n");
 	}
 	for (i = 0; i < npages; i++)
 		rtvals[i] = VM_PAGER_AGAIN;
 
 	/*
 	 * When putting pages, do not extend file past EOF.
 	 */
 
 	if (offset + count > fsize) {
 		count = fsize - offset;
 		if (count < 0)
 			count = 0;
 	}
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
 	 * convenient and fast.
 	 */
 	bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
 
 	kva = (vm_offset_t)bp->b_data;
 	pmap_qenter(kva, pages, npages);
 	VM_CNT_INC(v_vnodeout);
 	VM_CNT_ADD(v_vnodepgsout, count);
 
 	iov.iov_base = (caddr_t)kva;
 	iov.iov_len = count;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = offset;
 	uio.uio_resid = count;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_WRITE;
 	uio.uio_td = td;
 
 	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred, pid);
 
 	pmap_qremove(kva, npages);
 	uma_zfree(fuse_pbuf_zone, bp);
 
 	if (!error) {
 		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
 
 		for (i = 0; i < nwritten; i++) {
 			rtvals[i] = VM_PAGER_OK;
 			VM_OBJECT_WLOCK(pages[i]->object);
 			vm_page_undirty(pages[i]);
 			VM_OBJECT_WUNLOCK(pages[i]->object);
 		}
 	}
 	return rtvals[0];
 }
 
 static const char extattr_namespace_separator = '.';
 
 /*
     struct vop_getextattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_attrnamespace;
 	const char *a_name;
 	struct uio *a_uio;
 	size_t *a_size;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_getextattr(struct vop_getextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct fuse_dispatcher fdi;
 	struct fuse_getxattr_in *get_xattr_in;
 	struct fuse_getxattr_out *get_xattr_out;
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	char *prefix;
 	char *attr_str;
 	size_t len;
 	int err;
 
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	/* Default to looking for user attributes. */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
 	else
 		prefix = EXTATTR_NAMESPACE_USER_STRING;
 
 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
 	    strlen(ap->a_name) + 1;
 
 	fdisp_init(&fdi, len + sizeof(*get_xattr_in));
 	fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred);
 
 	get_xattr_in = fdi.indata;
 	/*
 	 * Check to see whether we're querying the available size or
 	 * issuing the actual request.  If we pass in 0, we get back struct
 	 * fuse_getxattr_out.  If we pass in a non-zero size, we get back
 	 * that much data, without the struct fuse_getxattr_out header.
 	 */
 	if (uio == NULL)
 		get_xattr_in->size = 0;
 	else
 		get_xattr_in->size = uio->uio_resid;
 
 	attr_str = (char *)fdi.indata + sizeof(*get_xattr_in);
 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
 	    ap->a_name);
 
 	err = fdisp_wait_answ(&fdi);
 	if (err != 0) {
 		if (err == ENOSYS)
 			fsess_set_notimpl(mp, FUSE_GETXATTR);
 		goto out;
 	}
 
 	get_xattr_out = fdi.answ;
 
 	if (ap->a_size != NULL)
 		*ap->a_size = get_xattr_out->size;
 
 	if (uio != NULL)
 		err = uiomove(fdi.answ, fdi.iosize, uio);
 
 out:
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 /*
     struct vop_setextattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_attrnamespace;
 	const char *a_name;
 	struct uio *a_uio;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_setextattr(struct vop_setextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct fuse_dispatcher fdi;
 	struct fuse_setxattr_in *set_xattr_in;
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	char *prefix;
 	size_t len;
 	char *attr_str;
 	int err;
 	
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	/* Default to looking for user attributes. */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
 	else
 		prefix = EXTATTR_NAMESPACE_USER_STRING;
 
 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
 	    strlen(ap->a_name) + 1;
 
 	fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid);
 	fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred);
 
 	set_xattr_in = fdi.indata;
 	set_xattr_in->size = uio->uio_resid;
 
 	attr_str = (char *)fdi.indata + sizeof(*set_xattr_in);
 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
 	    ap->a_name);
 
 	err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len,
 	    uio->uio_resid, uio);
 	if (err != 0) {
 		goto out;
 	}
 
 	err = fdisp_wait_answ(&fdi);
 
 	if (err != 0) {
 		if (err == ENOSYS)
 			fsess_set_notimpl(mp, FUSE_SETXATTR);
 		goto out;
 	}
 
 out:
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 /*
  * The Linux / FUSE extended attribute list is simply a collection of
  * NUL-terminated strings.  The FreeBSD extended attribute list is a single
  * byte length followed by a non-NUL terminated string.  So, this allows
  * conversion of the Linux / FUSE format to the FreeBSD format in place.
  * Linux attribute names are reported with the namespace as a prefix (e.g.
  * "user.attribute_name"), but in FreeBSD they are reported without the
  * namespace prefix (e.g. "attribute_name").  So, we're going from:
  *
  * user.attr_name1\0user.attr_name2\0
  *
  * to:
  *
  * <num>attr_name1<num>attr_name2
  *
  * Where "<num>" is a single byte number of characters in the attribute name.
  * 
  * Args:
  * prefix - exattr namespace prefix string
  * list, list_len - input list with namespace prefixes
  * bsd_list, bsd_list_len - output list compatible with bsd vfs
  */
 static int
 fuse_xattrlist_convert(char *prefix, const char *list, int list_len,
     char *bsd_list, int *bsd_list_len)
 {
 	int len, pos, dist_to_next, prefix_len;
 
 	pos = 0;
 	*bsd_list_len = 0;
 	prefix_len = strlen(prefix);
 
 	while (pos < list_len && list[pos] != '\0') {
 		dist_to_next = strlen(&list[pos]) + 1;
 		if (bcmp(&list[pos], prefix, prefix_len) == 0 &&
 		    list[pos + prefix_len] == extattr_namespace_separator) {
 			len = dist_to_next -
 			    (prefix_len + sizeof(extattr_namespace_separator)) - 1;
 			if (len >= EXTATTR_MAXNAMELEN)
 				return (ENAMETOOLONG);
 
 			bsd_list[*bsd_list_len] = len;
 			memcpy(&bsd_list[*bsd_list_len + 1],
 			    &list[pos + prefix_len +
 			    sizeof(extattr_namespace_separator)], len);
 
 			*bsd_list_len += len + 1;
 		}
 
 		pos += dist_to_next;
 	}
 
 	return (0);
 }
 
 /*
     struct vop_listextattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_attrnamespace;
 	struct uio *a_uio;
 	size_t *a_size;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_listextattr(struct vop_listextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct fuse_dispatcher fdi;
 	struct fuse_listxattr_in *list_xattr_in;
 	struct fuse_listxattr_out *list_xattr_out;
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	size_t len;
 	char *prefix;
 	char *attr_str;
 	char *bsd_list = NULL;
 	char *linux_list;
 	int bsd_list_len;
 	int linux_list_len;
 	int err;
 
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	/*
 	 * Add space for a NUL and the period separator if enabled.
 	 * Default to looking for user attributes.
 	 */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
 	else
 		prefix = EXTATTR_NAMESPACE_USER_STRING;
 
 	len = strlen(prefix) + sizeof(extattr_namespace_separator) + 1;
 
 	fdisp_init(&fdi, sizeof(*list_xattr_in) + len);
 	fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
 
 	/*
 	 * Retrieve Linux / FUSE compatible list size.
 	 */
 	list_xattr_in = fdi.indata;
 	list_xattr_in->size = 0;
 	attr_str = (char *)fdi.indata + sizeof(*list_xattr_in);
 	snprintf(attr_str, len, "%s%c", prefix, extattr_namespace_separator);
 
 	err = fdisp_wait_answ(&fdi);
 	if (err != 0) {
 		if (err == ENOSYS)
 			fsess_set_notimpl(mp, FUSE_LISTXATTR);
 		goto out;
 	}
 
 	list_xattr_out = fdi.answ;
 	linux_list_len = list_xattr_out->size;
 	if (linux_list_len == 0) {
 		if (ap->a_size != NULL)
 			*ap->a_size = linux_list_len;
 		goto out;
 	}
 
 	/*
 	 * Retrieve Linux / FUSE compatible list values.
 	 */
-	fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
+	fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
 	list_xattr_in = fdi.indata;
 	list_xattr_in->size = linux_list_len + sizeof(*list_xattr_out);
+	list_xattr_in->flags = 0;
 	attr_str = (char *)fdi.indata + sizeof(*list_xattr_in);
 	snprintf(attr_str, len, "%s%c", prefix, extattr_namespace_separator);
 
 	err = fdisp_wait_answ(&fdi);
 	if (err != 0)
 		goto out;
 
 	linux_list = fdi.answ;
 	linux_list_len = fdi.iosize;
 
 	/*
 	 * Retrieve the BSD compatible list values.
 	 * The Linux / FUSE attribute list format isn't the same
 	 * as FreeBSD's format. So we need to transform it into
 	 * FreeBSD's format before giving it to the user.
 	 */
 	bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK);
 	err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len,
 	    bsd_list, &bsd_list_len);
 	if (err != 0)
 		goto out;
 
 	if (ap->a_size != NULL)
 		*ap->a_size = bsd_list_len;
 
 	if (uio != NULL)
 		err = uiomove(bsd_list, bsd_list_len, uio);
 
 out:
 	free(bsd_list, M_TEMP);
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 /*
     struct vop_deleteextattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_attrnamespace;
 	const char *a_name;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct fuse_dispatcher fdi;
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	char *prefix;
 	size_t len;
 	char *attr_str;
 	int err;
 
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	/* Default to looking for user attributes. */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
 	else
 		prefix = EXTATTR_NAMESPACE_USER_STRING;
 
 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
 	    strlen(ap->a_name) + 1;
 
 	fdisp_init(&fdi, len);
 	fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred);
 
 	attr_str = fdi.indata;
 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
 	    ap->a_name);
 
 	err = fdisp_wait_answ(&fdi);
 	if (err != 0) {
 		if (err == ENOSYS)
 			fsess_set_notimpl(mp, FUSE_REMOVEXATTR);
 	}
 
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 /*
     struct vnop_print_args {
 	struct vnode *a_vp;
     };
 */
 static int
 fuse_vnop_print(struct vop_print_args *ap)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp);
 
 	printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n",
 	    (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid,
 	    (uintmax_t)fvdat->nlookup,
 	    fvdat->flag);
 
 	return 0;
 }
Index: projects/fuse2/tests/sys/fs/fusefs/mockfs.cc
===================================================================
--- projects/fuse2/tests/sys/fs/fusefs/mockfs.cc	(revision 345875)
+++ projects/fuse2/tests/sys/fs/fusefs/mockfs.cc	(revision 345876)
@@ -1,466 +1,467 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2019 The FreeBSD Foundation
  *
  * This software was developed by BFF Storage Systems, LLC under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 extern "C" {
 #include <sys/param.h>
 
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
 #include <sys/user.h>
 
 #include <fcntl.h>
 #include <libutil.h>
 #include <pthread.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <unistd.h>
 
 #include "mntopts.h"	// for build_iovec
 }
 
 #include <gtest/gtest.h>
 
 #include "mockfs.hh"
 
 using namespace testing;
 
 int verbosity = 0;
 static sig_atomic_t quit = 0;
 
 const char* opcode2opname(uint32_t opcode)
 {
 	const int NUM_OPS = 39;
 	const char* table[NUM_OPS] = {
 		"Unknown (opcode 0)",
 		"LOOKUP",
 		"FORGET",
 		"GETATTR",
 		"SETATTR",
 		"READLINK",
 		"SYMLINK",
 		"Unknown (opcode 7)",
 		"MKNOD",
 		"MKDIR",
 		"UNLINK",
 		"RMDIR",
 		"RENAME",
 		"LINK",
 		"OPEN",
 		"READ",
 		"WRITE",
 		"STATFS",
 		"RELEASE",
 		"Unknown (opcode 19)",
 		"FSYNC",
 		"SETXATTR",
 		"GETXATTR",
 		"LISTXATTR",
 		"REMOVEXATTR",
 		"FLUSH",
 		"INIT",
 		"OPENDIR",
 		"READDIR",
 		"RELEASEDIR",
 		"FSYNCDIR",
 		"GETLK",
 		"SETLK",
 		"SETLKW",
 		"ACCESS",
 		"CREATE",
 		"INTERRUPT",
 		"BMAP",
 		"DESTROY"
 	};
 	if (opcode >= NUM_OPS)
 		return ("Unknown (opcode > max)");
 	else
 		return (table[opcode]);
 }
 
 ProcessMockerT
 ReturnErrno(int error)
 {
 	return([=](auto in, auto &out) {
 		auto out0 = new mockfs_buf_out;
 		out0->header.unique = in->header.unique;
 		out0->header.error = -error;
 		out0->header.len = sizeof(out0->header);
 		out.push_back(out0);
 	});
 }
 
 /* Helper function used for returning negative cache entries for LOOKUP */
 ProcessMockerT
 ReturnNegativeCache(const struct timespec *entry_valid)
 {
 	return([=](auto in, auto &out) {
 		/* nodeid means ENOENT and cache it */
 		auto out0 = new mockfs_buf_out;
 		out0->body.entry.nodeid = 0;
 		out0->header.unique = in->header.unique;
 		out0->header.error = 0;
 		out0->body.entry.entry_valid = entry_valid->tv_sec;
 		out0->body.entry.entry_valid_nsec = entry_valid->tv_nsec;
 		SET_OUT_HEADER_LEN(out0, entry);
 		out.push_back(out0);
 	});
 }
 
 ProcessMockerT
 ReturnImmediate(std::function<void(const struct mockfs_buf_in *in,
 				   struct mockfs_buf_out *out)> f)
 {
 	return([=](auto in, auto &out) {
 		auto out0 = new mockfs_buf_out;
 		out0->header.unique = in->header.unique;
 		f(in, out0);
 		out.push_back(out0);
 	});
 }
 
 void sigint_handler(int __unused sig) {
 	quit = 1;
 }
 
 void debug_fuseop(const mockfs_buf_in *in)
 {
 	printf("%-11s ino=%2lu", opcode2opname(in->header.opcode),
 		in->header.nodeid);
 	if (verbosity > 1) {
 		printf(" uid=%5u gid=%5u pid=%5u unique=%lu len=%u",
 			in->header.uid, in->header.gid, in->header.pid,
 			in->header.unique, in->header.len);
 	}
 	switch (in->header.opcode) {
 		const char *name, *value;
 
 		case FUSE_CREATE:
 			name = (const char*)in->body.bytes +
 				sizeof(fuse_open_in);
 			printf(" flags=%#x name=%s",
 				in->body.open.flags, name);
 			break;
 		case FUSE_FLUSH:
 			printf(" fh=%#lx lock_owner=%lu", in->body.flush.fh,
 				in->body.flush.lock_owner);
 			break;
 		case FUSE_FORGET:
 			printf(" nlookup=%lu", in->body.forget.nlookup);
 			break;
 		case FUSE_FSYNC:
 			printf(" flags=%#x", in->body.fsync.fsync_flags);
 			break;
 		case FUSE_FSYNCDIR:
 			printf(" flags=%#x", in->body.fsyncdir.fsync_flags);
 			break;
 		case FUSE_LOOKUP:
 			printf(" %s", in->body.lookup);
 			break;
 		case FUSE_MKNOD:
 			printf(" mode=%#o rdev=%x", in->body.mknod.mode,
 				in->body.mknod.rdev);
 			break;
 		case FUSE_OPEN:
 			printf(" flags=%#x mode=%#o",
 				in->body.open.flags, in->body.open.mode);
 			break;
 		case FUSE_OPENDIR:
 			printf(" flags=%#x mode=%#o",
 				in->body.opendir.flags, in->body.opendir.mode);
 			break;
 		case FUSE_READ:
 			printf(" offset=%lu size=%u", in->body.read.offset,
 				in->body.read.size);
 			break;
 		case FUSE_READDIR:
 			printf(" fh=%#lx offset=%lu size=%u",
 				in->body.readdir.fh, in->body.readdir.offset,
 				in->body.readdir.size);
 			break;
 		case FUSE_RELEASE:
 			printf(" fh=%#lx flags=%#x lock_owner=%lu",
 				in->body.release.fh,
 				in->body.release.flags,
 				in->body.release.lock_owner);
 			break;
 		case FUSE_SETATTR:
 			if (verbosity <= 1) {
 				printf(" valid=%#x", in->body.setattr.valid);
 				break;
 			}
 			if (in->body.setattr.valid & FATTR_MODE)
 				printf(" mode=%#o", in->body.setattr.mode);
 			if (in->body.setattr.valid & FATTR_UID)
 				printf(" uid=%u", in->body.setattr.uid);
 			if (in->body.setattr.valid & FATTR_GID)
 				printf(" gid=%u", in->body.setattr.gid);
 			if (in->body.setattr.valid & FATTR_SIZE)
 				printf(" size=%zu", in->body.setattr.size);
 			if (in->body.setattr.valid & FATTR_ATIME)
 				printf(" atime=%zu.%u",
 					in->body.setattr.atime,
 					in->body.setattr.atimensec);
 			if (in->body.setattr.valid & FATTR_MTIME)
 				printf(" mtime=%zu.%u",
 					in->body.setattr.mtime,
 					in->body.setattr.mtimensec);
 			if (in->body.setattr.valid & FATTR_FH)
 				printf(" fh=%zu", in->body.setattr.fh);
 			break;
 		case FUSE_SETXATTR:
 			/* 
 			 * In theory neither the xattr name and value need be
 			 * ASCII, but in this test suite they always are.
 			 */
 			name = (const char*)in->body.bytes +
 				sizeof(fuse_setxattr_in);
 			value = name + strlen(name) + 1;
 			printf(" %s=%s", name, value);
 			break;
 		case FUSE_WRITE:
-			printf(" offset=%lu size=%u flags=%u",
+			printf(" fh=%#lx offset=%lu size=%u flags=%u",
+				in->body.write.fh,
 				in->body.write.offset, in->body.write.size,
 				in->body.write.write_flags);
 			break;
 		default:
 			break;
 	}
 	printf("\n");
 }
 
 MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions,
 	bool push_symlinks_in, uint32_t flags)
 {
 	struct iovec *iov = NULL;
 	int iovlen = 0;
 	char fdstr[15];
 	const bool trueval = true;
 
 	m_daemon_id = NULL;
 	m_maxreadahead = max_readahead;
 	quit = 0;
 
 	/*
 	 * Kyua sets pwd to a testcase-unique tempdir; no need to use
 	 * mkdtemp
 	 */
 	/*
 	 * googletest doesn't allow ASSERT_ in constructors, so we must throw
 	 * instead.
 	 */
 	if (mkdir("mountpoint" , 0755) && errno != EEXIST)
 		throw(std::system_error(errno, std::system_category(),
 			"Couldn't make mountpoint directory"));
 
 	m_fuse_fd = open("/dev/fuse", O_CLOEXEC | O_RDWR);
 	if (m_fuse_fd < 0)
 		throw(std::system_error(errno, std::system_category(),
 			"Couldn't open /dev/fuse"));
 	sprintf(fdstr, "%d", m_fuse_fd);
 
 	m_pid = getpid();
 	m_child_pid = -1;
 
 	build_iovec(&iov, &iovlen, "fstype", __DECONST(void *, "fusefs"), -1);
 	build_iovec(&iov, &iovlen, "fspath",
 		    __DECONST(void *, "mountpoint"), -1);
 	build_iovec(&iov, &iovlen, "from", __DECONST(void *, "/dev/fuse"), -1);
 	build_iovec(&iov, &iovlen, "fd", fdstr, -1);
 	if (allow_other) {
 		build_iovec(&iov, &iovlen, "allow_other",
 			__DECONST(void*, &trueval), sizeof(bool));
 	}
 	if (default_permissions) {
 		build_iovec(&iov, &iovlen, "default_permissions",
 			__DECONST(void*, &trueval), sizeof(bool));
 	}
 	if (push_symlinks_in) {
 		build_iovec(&iov, &iovlen, "push_symlinks_in",
 			__DECONST(void*, &trueval), sizeof(bool));
 	}
 	if (nmount(iov, iovlen, 0))
 		throw(std::system_error(errno, std::system_category(),
 			"Couldn't mount filesystem"));
 
 	// Setup default handler
 	ON_CALL(*this, process(_, _))
 		.WillByDefault(Invoke(this, &MockFS::process_default));
 
 	init(flags);
 	signal(SIGUSR1, sigint_handler);
 	if (pthread_create(&m_daemon_id, NULL, service, (void*)this))
 		throw(std::system_error(errno, std::system_category(),
 			"Couldn't Couldn't start fuse thread"));
 }
 
 MockFS::~MockFS() {
 	kill_daemon();
 	::unmount("mountpoint", MNT_FORCE);
 	if (m_daemon_id != NULL) {
 		pthread_join(m_daemon_id, NULL);
 		m_daemon_id = NULL;
 	}
 	rmdir("mountpoint");
 }
 
 void MockFS::init(uint32_t flags) {
 	mockfs_buf_in *in;
 	mockfs_buf_out *out;
 
 	in = (mockfs_buf_in*) malloc(sizeof(*in));
 	ASSERT_TRUE(in != NULL);
 	out = (mockfs_buf_out*) malloc(sizeof(*out));
 	ASSERT_TRUE(out != NULL);
 
 	read_request(in);
 	ASSERT_EQ(FUSE_INIT, in->header.opcode);
 
 	memset(out, 0, sizeof(*out));
 	out->header.unique = in->header.unique;
 	out->header.error = 0;
 	out->body.init.major = FUSE_KERNEL_VERSION;
 	out->body.init.minor = FUSE_KERNEL_MINOR_VERSION;
 	out->body.init.flags = in->body.init.flags & flags;
 
 	/*
 	 * The default max_write is set to this formula in libfuse, though
 	 * individual filesystems can lower it.  The "- 4096" was added in
 	 * commit 154ffe2, with the commit message "fix".
 	 */
 	uint32_t default_max_write = 32 * getpagesize() + 0x1000 - 4096;
 	/* For testing purposes, it should be distinct from MAXPHYS */
 	m_max_write = MIN(default_max_write, MAXPHYS / 2);
 	out->body.init.max_write = m_max_write;
 
 	out->body.init.max_readahead = m_maxreadahead;
 	SET_OUT_HEADER_LEN(out, init);
 	write(m_fuse_fd, out, out->header.len);
 
 	free(in);
 }
 
 void MockFS::kill_daemon() {
 	if (m_daemon_id != NULL) {
 		pthread_kill(m_daemon_id, SIGUSR1);
 		// Closing the /dev/fuse file descriptor first allows unmount
 		// to succeed even if the daemon doesn't correctly respond to
 		// commands during the unmount sequence.
 		close(m_fuse_fd);
 	}
 }
 
 void MockFS::loop() {
 	mockfs_buf_in *in;
 	std::vector<mockfs_buf_out*> out;
 
 	in = (mockfs_buf_in*) malloc(sizeof(*in));
 	ASSERT_TRUE(in != NULL);
 	while (!quit) {
 		bzero(in, sizeof(*in));
 		read_request(in);
 		if (quit)
 			break;
 		if (verbosity > 0)
 			debug_fuseop(in);
 		if (pid_ok((pid_t)in->header.pid)) {
 			process(in, out);
 		} else {
 			/* 
 			 * Reject any requests from unknown processes.  Because
 			 * we actually do mount a filesystem, plenty of
 			 * unrelated system daemons may try to access it.
 			 */
 			process_default(in, out);
 		}
 		for (auto &it: out) {
 			ASSERT_TRUE(write(m_fuse_fd, it, it->header.len) > 0 ||
 				    errno == EAGAIN)
 				<< strerror(errno);
 			delete it;
 		}
 		out.clear();
 	}
 	free(in);
 }
 
 bool MockFS::pid_ok(pid_t pid) {
 	if (pid == m_pid) {
 		return (true);
 	} else if (pid == m_child_pid) {
 		return (true);
 	} else {
 		struct kinfo_proc *ki;
 		bool ok = false;
 
 		ki = kinfo_getproc(pid);
 		if (ki == NULL)
 			return (false);
 		/* 
 		 * Allow access by the aio daemon processes so that our tests
 		 * can use aio functions
 		 */
 		if (0 == strncmp("aiod", ki->ki_comm, 4))
 			ok = true;
 		free(ki);
 		return (ok);
 	}
 }
 
 void MockFS::process_default(const mockfs_buf_in *in,
 		std::vector<mockfs_buf_out*> &out)
 {
 	auto out0 = new mockfs_buf_out;
 	out0->header.unique = in->header.unique;
 	out0->header.error = -EOPNOTSUPP;
 	out0->header.len = sizeof(out0->header);
 	out.push_back(out0);
 }
 
 void MockFS::read_request(mockfs_buf_in *in) {
 	ssize_t res;
 
 	res = read(m_fuse_fd, in, sizeof(*in));
 	if (res < 0 && !quit)
 		perror("read");
 	ASSERT_TRUE(res >= (ssize_t)sizeof(in->header) || quit);
 }
 
 void* MockFS::service(void *pthr_data) {
 	MockFS *mock_fs = (MockFS*)pthr_data;
 
 	mock_fs->loop();
 
 	return (NULL);
 }
 
 void MockFS::unmount() {
 	::unmount("mountpoint", 0);
 }
Index: projects/fuse2/tests/sys/fs/fusefs/write.cc
===================================================================
--- projects/fuse2/tests/sys/fs/fusefs/write.cc	(revision 345875)
+++ projects/fuse2/tests/sys/fs/fusefs/write.cc	(revision 345876)
@@ -1,691 +1,716 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2019 The FreeBSD Foundation
  *
  * This software was developed by BFF Storage Systems, LLC under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 extern "C" {
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <aio.h>
 #include <fcntl.h>
 #include <unistd.h>
 }
 
 #include "mockfs.hh"
 #include "utils.hh"
 
 using namespace testing;
 
 class Write: public FuseTest {
 
 public:
 
 void expect_lookup(const char *relpath, uint64_t ino, uint64_t size)
 {
 	FuseTest::expect_lookup(relpath, ino, S_IFREG | 0644, size, 1);
 }
 
 void expect_release(uint64_t ino, ProcessMockerT r)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in->header.opcode == FUSE_RELEASE &&
 				in->header.nodeid == ino);
 		}, Eq(true)),
 		_)
 	).WillRepeatedly(Invoke(r));
 }
 
 void require_sync_resize_0() {
 	const char *sync_resize_node = "vfs.fusefs.sync_resize";
 	int val = 0;
 	size_t size = sizeof(val);
 
 	ASSERT_EQ(0, sysctlbyname(sync_resize_node, &val, &size, NULL, 0))
 		<< strerror(errno);
 	if (val != 0)
 		GTEST_SKIP() <<
 			"vfs.fusefs.sync_resize must be set to 0 for this test."
 			"  That sysctl will probably be removed soon.";
 }
 
 };
 
 class AioWrite: public Write {
 virtual void SetUp() {
 	const char *node = "vfs.aio.enable_unsafe";
 	int val = 0;
 	size_t size = sizeof(val);
 
 	FuseTest::SetUp();
 
 	ASSERT_EQ(0, sysctlbyname(node, &val, &size, NULL, 0))
 		<< strerror(errno);
 	if (!val)
 		GTEST_SKIP() <<
 			"vfs.aio.enable_unsafe must be set for this test";
 }
 };
 
 /* Tests for the write-through cache mode */
 class WriteThrough: public Write {
 
 virtual void SetUp() {
 	const char *cache_mode_node = "vfs.fusefs.data_cache_mode";
 	int val = 0;
 	size_t size = sizeof(val);
 
 	FuseTest::SetUp();
 	if (IsSkipped())
 		return;
 
 	ASSERT_EQ(0, sysctlbyname(cache_mode_node, &val, &size, NULL, 0))
 		<< strerror(errno);
 	if (val != 1)
 		GTEST_SKIP() << "vfs.fusefs.data_cache_mode must be set to 1 "
 			"(writethrough) for this test";
 }
 
 };
 
 /* Tests for the writeback cache mode */
 class WriteBack: public Write {
 
 virtual void SetUp() {
 	const char *node = "vfs.fusefs.data_cache_mode";
 	int val = 0;
 	size_t size = sizeof(val);
 
 	FuseTest::SetUp();
 	if (IsSkipped())
 		return;
 
 	ASSERT_EQ(0, sysctlbyname(node, &val, &size, NULL, 0))
 		<< strerror(errno);
 	if (val != 2)
 		GTEST_SKIP() << "vfs.fusefs.data_cache_mode must be set to 2 "
 			"(writeback) for this test";
 }
 
 };
 
 /* AIO writes need to set the header's pid field correctly */
 /* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236379 */
 TEST_F(AioWrite, DISABLED_aio_write)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	uint64_t offset = 4096;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	struct aiocb iocb, *piocb;
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, 0);
 	expect_write(ino, offset, bufsize, bufsize, 0, CONTENTS);
 
 	fd = open(FULLPATH, O_WRONLY);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	iocb.aio_nbytes = bufsize;
 	iocb.aio_fildes = fd;
 	iocb.aio_buf = (void *)CONTENTS;
 	iocb.aio_offset = offset;
 	iocb.aio_sigevent.sigev_notify = SIGEV_NONE;
 	ASSERT_EQ(0, aio_write(&iocb)) << strerror(errno);
 	ASSERT_EQ(bufsize, aio_waitcomplete(&piocb, NULL)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* 
  * When a file is opened with O_APPEND, we should forward that flag to
  * FUSE_OPEN (tested by Open.o_append) but still attempt to calculate the
  * offset internally.  That way we'll work both with filesystems that
  * understand O_APPEND (and ignore the offset) and filesystems that don't (and
  * simply use the offset).
  *
  * Note that verifying the O_APPEND flag in FUSE_OPEN is done in the
  * Open.o_append test.
  */
 TEST_F(Write, append)
 {
 	const ssize_t BUFSIZE = 9;
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char CONTENTS[BUFSIZE] = "abcdefgh";
 	uint64_t ino = 42;
 	/* 
 	 * Set offset to a maxbcachebuf boundary so we don't need to RMW when
 	 * using writeback caching
 	 */
 	uint64_t initial_offset = m_maxbcachebuf;
 	int fd;
 
 	require_sync_resize_0();
 
 	expect_lookup(RELPATH, ino, initial_offset);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, initial_offset);
 	expect_write(ino, initial_offset, BUFSIZE, BUFSIZE, 0, CONTENTS);
 
 	/* Must open O_RDWR or fuse(4) implicitly sets direct_io */
 	fd = open(FULLPATH, O_RDWR | O_APPEND);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 TEST_F(Write, append_direct_io)
 {
 	const ssize_t BUFSIZE = 9;
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char CONTENTS[BUFSIZE] = "abcdefgh";
 	uint64_t ino = 42;
 	uint64_t initial_offset = 4096;
 	int fd;
 
 	expect_lookup(RELPATH, ino, initial_offset);
 	expect_open(ino, FOPEN_DIRECT_IO, 1);
 	expect_getattr(ino, initial_offset);
 	expect_write(ino, initial_offset, BUFSIZE, BUFSIZE, 0, CONTENTS);
 
 	fd = open(FULLPATH, O_WRONLY | O_APPEND);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* A direct write should evict any overlapping cached data */
 /* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=235774 */
 TEST_F(Write, DISABLED_direct_io_evicts_cache)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char CONTENTS0[] = "abcdefgh";
 	const char CONTENTS1[] = "ijklmnop";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS0) + 1;
 	char readbuf[bufsize];
 
 	expect_lookup(RELPATH, ino, bufsize);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, bufsize);
 	expect_read(ino, 0, bufsize, bufsize, CONTENTS0);
 	expect_write(ino, 0, bufsize, bufsize, 0, CONTENTS1);
 
 	fd = open(FULLPATH, O_RDWR);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	// Prime cache
 	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
 
 	// Write directly, evicting cache
 	ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno);
 	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
 	ASSERT_EQ(bufsize, write(fd, CONTENTS1, bufsize)) << strerror(errno);
 
 	// Read again.  Cache should be bypassed
 	expect_read(ino, 0, bufsize, bufsize, CONTENTS1);
 	ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno);
 	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
 	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
 	ASSERT_STREQ(readbuf, CONTENTS1);
 
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
+/*
+ * If the server doesn't return FOPEN_DIRECT_IO during FUSE_OPEN, then it's not
+ * allowed to return a short write for that file handle.  However, if it does
+ * then we should still do our darndest to handle it by resending the unwritten
+ * portion.
+ */
+TEST_F(Write, indirect_io_short_write)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const char *CONTENTS = "abcdefghijklmnop";
+	uint64_t ino = 42;
+	int fd;
+	ssize_t bufsize = strlen(CONTENTS);
+	ssize_t bufsize0 = 11;
+	ssize_t bufsize1 = strlen(CONTENTS) - bufsize0;
+	const char *contents1 = CONTENTS + bufsize0;
+
+	expect_lookup(RELPATH, ino, 0);
+	expect_open(ino, 0, 1);
+	expect_getattr(ino, 0);
+	expect_write(ino, 0, bufsize, bufsize0, 0, CONTENTS);
+	expect_write(ino, bufsize0, bufsize1, bufsize1, 0,
+		contents1);
+
+	fd = open(FULLPATH, O_WRONLY);
+	EXPECT_LE(0, fd) << strerror(errno);
+
+	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
+	/* Deliberately leak fd.  close(2) will be tested in release.cc */
+}
+
 /* 
  * When the direct_io option is used, filesystems are allowed to write less
- * data than requested
+ * data than requested.  We should return the short write to userland.
  */
-/* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236381 */
-TEST_F(Write, DISABLED_direct_io_short_write)
+TEST_F(Write, direct_io_short_write)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefghijklmnop";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	ssize_t halfbufsize = bufsize / 2;
-	const char *halfcontents = CONTENTS + halfbufsize;
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, FOPEN_DIRECT_IO, 1);
 	expect_getattr(ino, 0);
 	expect_write(ino, 0, bufsize, halfbufsize, 0, CONTENTS);
-	expect_write(ino, halfbufsize, halfbufsize, halfbufsize, 0,
-		halfcontents);
 
 	fd = open(FULLPATH, O_WRONLY);
 	EXPECT_LE(0, fd) << strerror(errno);
 
-	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
+	ASSERT_EQ(halfbufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /*
  * An insidious edge case: the filesystem returns a short write, and the
  * difference between what we requested and what it actually wrote crosses an
  * iov element boundary
  */
-/* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236381 */
-TEST_F(Write, DISABLED_direct_io_short_write_iov)
+TEST_F(Write, direct_io_short_write_iov)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS0 = "abcdefgh";
 	const char *CONTENTS1 = "ijklmnop";
 	const char *EXPECTED0 = "abcdefghijklmnop";
-	const char *EXPECTED1 = "hijklmnop";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t size0 = strlen(CONTENTS0) - 1;
 	ssize_t size1 = strlen(CONTENTS1) + 1;
 	ssize_t totalsize = size0 + size1;
 	struct iovec iov[2];
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, FOPEN_DIRECT_IO, 1);
 	expect_getattr(ino, 0);
 	expect_write(ino, 0, totalsize, size0, 0, EXPECTED0);
-	expect_write(ino, size0, size1, size1, 0, EXPECTED1);
 
 	fd = open(FULLPATH, O_WRONLY);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	iov[0].iov_base = (void*)CONTENTS0;
 	iov[0].iov_len = strlen(CONTENTS0);
 	iov[1].iov_base = (void*)CONTENTS1;
 	iov[1].iov_len = strlen(CONTENTS1);
-	ASSERT_EQ(totalsize, writev(fd, iov, 2)) << strerror(errno);
+	ASSERT_EQ(size0, writev(fd, iov, 2)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /*
  * If the kernel cannot be sure which uid, gid, or pid was responsible for a
  * write, then it must set the FUSE_WRITE_CACHE bit
  */
 /* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236378 */
 // TODO: check vfs.fusefs.mmap_enable
 TEST_F(Write, DISABLED_mmap)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	void *p;
 	uint64_t offset = 10;
 	size_t len;
 	void *zeros, *expected;
 
 	len = getpagesize();
 
 	zeros = calloc(1, len);
 	ASSERT_NE(NULL, zeros);
 	expected = calloc(1, len);
 	ASSERT_NE(NULL, expected);
 	memmove((uint8_t*)expected + offset, CONTENTS, bufsize);
 
 	expect_lookup(RELPATH, ino, len);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, len);
 	expect_read(ino, 0, len, len, zeros);
 	/* 
 	 * Writes from the pager may or may not be associated with the correct
 	 * pid, so they must set FUSE_WRITE_CACHE
 	 */
 	expect_write(ino, 0, len, len, FUSE_WRITE_CACHE, expected);
 	expect_flush(ino, 1, ReturnErrno(0));
 	expect_release(ino, ReturnErrno(0));
 
 	fd = open(FULLPATH, O_RDWR);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
 	ASSERT_NE(MAP_FAILED, p) << strerror(errno);
 
 	memmove((uint8_t*)p + offset, CONTENTS, bufsize);
 
 	ASSERT_EQ(0, munmap(p, len)) << strerror(errno);
 	close(fd);	// Write mmap'd data on close
 
 	free(expected);
 	free(zeros);
 }
 
 TEST_F(WriteThrough, pwrite)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	uint64_t offset = 4096;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, 0);
 	expect_write(ino, offset, bufsize, bufsize, 0, CONTENTS);
 
 	fd = open(FULLPATH, O_WRONLY);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset))
 		<< strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 TEST_F(Write, write)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, 0);
 	expect_write(ino, 0, bufsize, bufsize, 0, CONTENTS);
 
 	fd = open(FULLPATH, O_WRONLY);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* fuse(4) should not issue writes of greater size than the daemon requests */
 TEST_F(Write, write_large)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	int *contents;
 	uint64_t ino = 42;
 	int fd;
 	ssize_t halfbufsize, bufsize;
 
 	halfbufsize = m_mock->m_max_write;
 	bufsize = halfbufsize * 2;
 	contents = (int*)malloc(bufsize);
 	ASSERT_NE(NULL, contents);
 	for (int i = 0; i < (int)bufsize / (int)sizeof(i); i++) {
 		contents[i] = i;
 	}
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, 0);
 	expect_write(ino, 0, halfbufsize, halfbufsize, 0, contents);
 	expect_write(ino, halfbufsize, halfbufsize, halfbufsize, 0,
 		&contents[halfbufsize / sizeof(int)]);
 
 	fd = open(FULLPATH, O_WRONLY);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, write(fd, contents, bufsize)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 
 	free(contents);
 }
 
 TEST_F(Write, write_nothing)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = 0;
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, 0);
 
 	fd = open(FULLPATH, O_WRONLY);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* In writeback mode, dirty data should be written on close */
 TEST_F(WriteBack, close)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, 0);
 	expect_write(ino, 0, bufsize, bufsize, 0, CONTENTS);
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in->header.opcode == FUSE_SETATTR);
 		}, Eq(true)),
 		_)
 	).WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto out) {
 		SET_OUT_HEADER_LEN(out, attr);
 		out->body.attr.attr.ino = ino;	// Must match nodeid
 	})));
 	expect_flush(ino, 1, ReturnErrno(0));
 	expect_release(ino, ReturnErrno(0));
 
 	fd = open(FULLPATH, O_RDWR);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
 	close(fd);
 }
 
 /*
  * In writeback mode, writes to an O_WRONLY file could trigger reads from the
  * server.  The FUSE protocol explicitly allows that.
  */
 TEST_F(WriteBack, rmw)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	const char *INITIAL   = "XXXXXXXXXX";
 	uint64_t ino = 42;
 	uint64_t offset = 1;
 	off_t fsize = 10;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, fsize);
 	expect_read(ino, 0, fsize, fsize, INITIAL);
 	expect_write(ino, offset, bufsize, bufsize, 0, CONTENTS);
 
 	fd = open(FULLPATH, O_WRONLY);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset))
 		<< strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /*
  * Without direct_io, writes should be committed to cache
  */
 TEST_F(WriteBack, writeback)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	char readbuf[bufsize];
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, 0);
 	expect_write(ino, 0, bufsize, bufsize, 0, CONTENTS);
 
 	fd = open(FULLPATH, O_RDWR);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
 	/* 
 	 * A subsequent read should be serviced by cache, without querying the
 	 * filesystem daemon
 	 */
 	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
 	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /*
  * With O_DIRECT, writes should be not committed to cache.  Admittedly this is
  * an odd test, because it would be unusual to use O_DIRECT for writes but not
  * reads.
  */
 TEST_F(WriteBack, o_direct)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	char readbuf[bufsize];
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, 0);
 	expect_write(ino, 0, bufsize, bufsize, 0, CONTENTS);
 	expect_read(ino, 0, bufsize, bufsize, CONTENTS);
 
 	fd = open(FULLPATH, O_RDWR | O_DIRECT);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
 	/* A subsequent read must query the daemon because cache is empty */
 	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
 	ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno);
 	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /*
  * Without direct_io, writes should be committed to cache
  */
 /* 
  * Disabled because we don't yet implement write-through caching.  No bugzilla
  * entry, because that's a feature request, not a bug.
  */
 TEST_F(WriteThrough, DISABLED_writethrough)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	char readbuf[bufsize];
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, 0, 1);
 	expect_getattr(ino, 0);
 	expect_write(ino, 0, bufsize, bufsize, 0, CONTENTS);
 
 	fd = open(FULLPATH, O_RDWR);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
 	/* 
 	 * A subsequent read should be serviced by cache, without querying the
 	 * filesystem daemon
 	 */
 	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* With writethrough caching, writes update the cached file size */
 /* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=235775 */
 TEST_F(WriteThrough, DISABLED_update_file_size)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	struct stat sb;
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 
 	expect_lookup(RELPATH, ino, 0);
 	expect_open(ino, 0, 1);
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in->header.opcode == FUSE_GETATTR &&
 				in->header.nodeid == ino);
 		}, Eq(true)),
 		_)
 	).Times(2)
 	.WillRepeatedly(Invoke(ReturnImmediate([=](auto in __unused, auto out) {
 		SET_OUT_HEADER_LEN(out, attr);
 		out->body.attr.attr.ino = ino;	// Must match nodeid
 		out->body.attr.attr.mode = S_IFREG | 0644;
 		out->body.attr.attr.size = 0;
 		out->body.attr.attr_valid = UINT64_MAX;
 	})));
 	expect_write(ino, 0, bufsize, bufsize, 0, CONTENTS);
 
 	fd = open(FULLPATH, O_RDWR);
 	EXPECT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
 	/* Get cached attributes */
 	ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno);
 	ASSERT_EQ(bufsize, sb.st_size);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }