Index: projects/fuse2/sys/fs/fuse/fuse_internal.c
===================================================================
--- projects/fuse2/sys/fs/fuse/fuse_internal.c	(revision 349237)
+++ projects/fuse2/sys/fs/fuse/fuse_internal.c	(revision 349238)
@@ -1,1097 +1,1098 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sdt.h>
 #include <sys/sx.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/filedesc.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/dirent.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysctl.h>
 #include <sys/priv.h>
 
 #include "fuse.h"
 #include "fuse_file.h"
 #include "fuse_internal.h"
 #include "fuse_io.h"
 #include "fuse_ipc.h"
 #include "fuse_node.h"
 #include "fuse_file.h"
 
 SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*");
 
 #ifdef ZERO_PAD_INCOMPLETE_BUFS
 static int isbzero(void *buf, size_t len);
 
 #endif
 
 int
 fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags,
 	struct vnode **vpp)
 {
 	struct bintime now;
 	struct thread *td = curthread;
 	uint64_t nodeid = ino;
 	int error;
 
 	*vpp = NULL;
 
 	error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp,
 	    fuse_vnode_cmp, &nodeid);
 	if (error)
 		return error;
 	/*
 	 * Check the entry cache timeout.  We have to do this within fusefs
 	 * instead of by using cache_enter_time/cache_lookup because those
 	 * routines are only intended to work with pathnames, not inodes
 	 */
 	if (*vpp != NULL) {
 		getbinuptime(&now);
 		if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){
 			atomic_add_acq_long(&fuse_lookup_cache_hits, 1);
 			return 0;
 		} else {
 			/* Entry cache timeout */
 			atomic_add_acq_long(&fuse_lookup_cache_misses, 1);
 			cache_purge(*vpp);
 			vput(*vpp);
 			*vpp = NULL;
 		}
 	}
 	return 0;
 }
 
 /* Synchronously send a FUSE_ACCESS operation */
 int
 fuse_internal_access(struct vnode *vp,
     accmode_t mode,
     struct thread *td,
     struct ucred *cred)
 {
 	int err = 0;
 	uint32_t mask = F_OK;
 	int dataflags;
 	int vtype;
 	struct mount *mp;
 	struct fuse_dispatcher fdi;
 	struct fuse_access_in *fai;
 	struct fuse_data *data;
 
 	mp = vnode_mount(vp);
 	vtype = vnode_vtype(vp);
 
 	data = fuse_get_mpdata(mp);
 	dataflags = data->dataflags;
 
 	if (mode == 0)
 		return 0;
 
 	if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) {
 		switch (vp->v_type) {
 		case VDIR:
 			/* FALLTHROUGH */
 		case VLNK:
 			/* FALLTHROUGH */
 		case VREG:
 			return EROFS;
 		default:
 			break;
 		}
 	}
 
 	/* Unless explicitly permitted, deny everyone except the fs owner. */
 	if (!(dataflags & FSESS_DAEMON_CAN_SPY)) {
 		if (fuse_match_cred(data->daemoncred, cred))
 			return EPERM;
 	}
 
 	if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
 		struct vattr va;
 
 		fuse_internal_getattr(vp, &va, cred, td);
 		return vaccess(vp->v_type, va.va_mode, va.va_uid,
 		    va.va_gid, mode, cred, NULL);
 	}
 
 	if (!fsess_isimpl(mp, FUSE_ACCESS))
 		return 0;
 
 	if ((mode & (VWRITE | VAPPEND | VADMIN)) != 0)
 		mask |= W_OK;
 	if ((mode & VREAD) != 0)
 		mask |= R_OK;
 	if ((mode & VEXEC) != 0)
 		mask |= X_OK;
 
 	fdisp_init(&fdi, sizeof(*fai));
 	fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred);
 
 	fai = fdi.indata;
 	fai->mask = mask;
 
 	err = fdisp_wait_answ(&fdi);
 	fdisp_destroy(&fdi);
 
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_ACCESS);
 		err = 0;
 	}
 	return err;
 }
 
 /*
  * Cache FUSE attributes from attr, in attribute cache associated with vnode
  * 'vp'.  Optionally, if argument 'vap' is not NULL, store a copy of the
  * converted attributes there as well.
  *
  * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do
  * return the result to the caller).
  */
 void
 fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
 	uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap)
 {
 	struct mount *mp;
 	struct fuse_vnode_data *fvdat;
 	struct fuse_data *data;
 	struct vattr *vp_cache_at;
 
 	mp = vnode_mount(vp);
 	fvdat = VTOFUD(vp);
 	data = fuse_get_mpdata(mp);
 
 	ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
 
 	fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
 		&fvdat->attr_cache_timeout);
 
 	/* Fix our buffers if the filesize changed without us knowing */
 	if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) {
 		(void)fuse_vnode_setsize(vp, attr->size);
 		fvdat->cached_attrs.va_size = attr->size;
 	}
 
 	if (attr_valid > 0 || attr_valid_nsec > 0)
 		vp_cache_at = &(fvdat->cached_attrs);
 	else if (vap != NULL)
 		vp_cache_at = vap;
 	else
 		return;
 
 	vattr_null(vp_cache_at);
 	vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0];
 	vp_cache_at->va_fileid = attr->ino;
 	vp_cache_at->va_mode = attr->mode & ~S_IFMT;
 	vp_cache_at->va_nlink     = attr->nlink;
 	vp_cache_at->va_uid       = attr->uid;
 	vp_cache_at->va_gid       = attr->gid;
 	vp_cache_at->va_rdev      = attr->rdev;
 	vp_cache_at->va_size      = attr->size;
 	/* XXX on i386, seconds are truncated to 32 bits */
 	vp_cache_at->va_atime.tv_sec  = attr->atime;
 	vp_cache_at->va_atime.tv_nsec = attr->atimensec;
 	vp_cache_at->va_mtime.tv_sec  = attr->mtime;
 	vp_cache_at->va_mtime.tv_nsec = attr->mtimensec;
 	vp_cache_at->va_ctime.tv_sec  = attr->ctime;
 	vp_cache_at->va_ctime.tv_nsec = attr->ctimensec;
 	if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0)
 		vp_cache_at->va_blocksize = attr->blksize;
 	else
 		vp_cache_at->va_blocksize = PAGE_SIZE;
 	vp_cache_at->va_type = IFTOVT(attr->mode);
 	vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE;
 	vp_cache_at->va_flags = 0;
 
 	if (vap != vp_cache_at && vap != NULL)
 		memcpy(vap, vp_cache_at, sizeof(*vap));
 }
 
 
 /* fsync */
 
 int
 fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio)
 {
 	if (tick->tk_aw_ohead.error == ENOSYS) {
 		fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick));
 	}
 	return 0;
 }
 
 int
 fuse_internal_fsync(struct vnode *vp,
     struct thread *td,
     int waitfor,
     bool datasync)
 {
 	struct fuse_fsync_in *ffsi = NULL;
 	struct fuse_dispatcher fdi;
 	struct fuse_filehandle *fufh;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct mount *mp = vnode_mount(vp);
 	int op = FUSE_FSYNC;
 	int err = 0;
 
 	if (!fsess_isimpl(vnode_mount(vp),
 	    (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
 		return 0;
 	}
 	if (vnode_isdir(vp))
 		op = FUSE_FSYNCDIR;
 
 	if (!fsess_isimpl(mp, op))
 		return 0;
 
 	fdisp_init(&fdi, sizeof(*ffsi));
 	/*
 	 * fsync every open file handle for this file, because we can't be sure
 	 * which file handle the caller is really referring to.
 	 */
 	LIST_FOREACH(fufh, &fvdat->handles, next) {
 		if (ffsi == NULL)
 			fdisp_make_vp(&fdi, op, vp, td, NULL);
 		else
 			fdisp_refresh_vp(&fdi, op, vp, td, NULL);
 		ffsi = fdi.indata;
 		ffsi->fh = fufh->fh_id;
 		ffsi->fsync_flags = 0;
 
 		if (datasync)
 			ffsi->fsync_flags = 1;
 
 		if (waitfor == MNT_WAIT) {
 			err = fdisp_wait_answ(&fdi);
 		} else {
 			fuse_insert_callback(fdi.tick,
 				fuse_internal_fsync_callback);
 			fuse_insert_message(fdi.tick, false);
 		}
 		if (err == ENOSYS) {
 			/* ENOSYS means "success, and don't call again" */
 			fsess_set_notimpl(mp, op);
 			err = 0;
 			break;
 		}
 	}
 	fdisp_destroy(&fdi);
 
 	return err;
 }
 
 /* Asynchronous invalidation */
 SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_cache_hit,
 	"struct vnode*", "struct vnode*");
 int
 fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio)
 {
 	struct fuse_notify_inval_entry_out fnieo;
 	struct componentname cn;
 	struct vnode *dvp, *vp;
 	char name[PATH_MAX];
 	int err;
 
 	if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0)
 		return (err);
 
 	if ((err = uiomove(name, fnieo.namelen, uio)) != 0)
 		return (err);
 	name[fnieo.namelen] = '\0';
 	/* fusefs does not cache "." or ".." entries */
 	if (strncmp(name, ".", sizeof(".")) == 0 ||
 	    strncmp(name, "..", sizeof("..")) == 0)
 		return (0);
 
 	if (fnieo.parent == FUSE_ROOT_ID)
 		err = VFS_ROOT(mp, LK_SHARED, &dvp);
 	else
 		err = fuse_internal_get_cached_vnode( mp, fnieo.parent,
 			LK_SHARED, &dvp);
 	/* 
 	 * If dvp is not in the cache, then it must've been reclaimed.  And
 	 * since fuse_vnop_reclaim does a cache_purge, name's entry must've
 	 * been invalidated already.  So we can safely return if dvp == NULL
 	 */
 	if (err != 0 || dvp == NULL)
 		return (err);
 	/*
 	 * XXX we can't check dvp's generation because the FUSE invalidate
 	 * entry message doesn't include it.  Worse case is that we invalidate
 	 * an entry that didn't need to be invalidated.
 	 */
 
 	cn.cn_nameiop = LOOKUP;
 	cn.cn_flags = 0;	/* !MAKEENTRY means free cached entry */
 	cn.cn_thread = curthread;
 	cn.cn_cred = curthread->td_ucred;
 	cn.cn_lkflags = LK_SHARED;
 	cn.cn_pnbuf = NULL;
 	cn.cn_nameptr = name;
 	cn.cn_namelen = fnieo.namelen;
 	err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
 	MPASS(err == 0);
 	fuse_vnode_clear_attr_cache(dvp);
 	vput(dvp);
 	return (0);
 }
 
 int
 fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio)
 {
 	struct fuse_notify_inval_inode_out fniio;
 	struct vnode *vp;
 	int err;
 
 	if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0)
 		return (err);
 
 	if (fniio.ino == FUSE_ROOT_ID)
 		err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
 	else
 		err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
 			&vp);
 	if (err != 0 || vp == NULL)
 		return (err);
 	/*
 	 * XXX we can't check vp's generation because the FUSE invalidate
 	 * entry message doesn't include it.  Worse case is that we invalidate
 	 * an inode that didn't need to be invalidated.
 	 */
 
 	/* 
 	 * Flush and invalidate buffers if off >= 0.  Technically we only need
 	 * to flush and invalidate the range of offsets [off, off + len), but
 	 * for simplicity's sake we do everything.
 	 */
 	if (fniio.off >= 0)
 		fuse_io_invalbuf(vp, curthread);
 	fuse_vnode_clear_attr_cache(vp);
 	vput(vp);
 	return (0);
 }
 
 /* mknod */
 int
 fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp,
 	struct componentname *cnp, struct vattr *vap)
 {
 	struct fuse_data *data;
 	struct fuse_mknod_in fmni;
 	size_t insize;
 
 	data = fuse_get_mpdata(dvp->v_mount);
 
 	fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode);
 	fmni.rdev = vap->va_rdev;
 	if (fuse_libabi_geq(data, 7, 12)) {
 		insize = sizeof(fmni);
 		fmni.umask = curthread->td_proc->p_fd->fd_cmask;
 	} else {
 		insize = FUSE_COMPAT_MKNOD_IN_SIZE;
 	}
 	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni,
 	    insize, vap->va_type));
 }
 
 /* readdir */
 
 int
 fuse_internal_readdir(struct vnode *vp,
     struct uio *uio,
     off_t startoff,
     struct fuse_filehandle *fufh,
     struct fuse_iov *cookediov,
     int *ncookies,
     u_long *cookies)
 {
 	int err = 0;
 	struct fuse_dispatcher fdi;
 	struct fuse_read_in *fri = NULL;
 	int fnd_start;
 
 	if (uio_resid(uio) == 0)
 		return 0;
 	fdisp_init(&fdi, 0);
 
 	/*
 	 * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p
 	 * I/O).
 	 */
 
 	/*
 	 * fnd_start is set non-zero once the offset in the directory gets
 	 * to the startoff.  This is done because directories must be read
 	 * from the beginning (offset == 0) when fuse_vnop_readdir() needs
 	 * to do an open of the directory.
 	 * If it is not set non-zero here, it will be set non-zero in
 	 * fuse_internal_readdir_processdata() when uio_offset == startoff.
 	 */
 	fnd_start = 0;
 	if (uio->uio_offset == startoff)
 		fnd_start = 1;
 	while (uio_resid(uio) > 0) {
 		fdi.iosize = sizeof(*fri);
 		if (fri == NULL)
 			fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
 		else
 			fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
 
 		fri = fdi.indata;
 		fri->fh = fufh->fh_id;
 		fri->offset = uio_offset(uio);
 		fri->size = MIN(uio->uio_resid,
 		    fuse_get_mpdata(vp->v_mount)->max_read);
 
 		if ((err = fdisp_wait_answ(&fdi)))
 			break;
 		if ((err = fuse_internal_readdir_processdata(uio, startoff,
 		    &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov,
 		    ncookies, &cookies)))
 			break;
 	}
 
 	fdisp_destroy(&fdi);
 	return ((err == -1) ? 0 : err);
 }
 
 /*
  * Return -1 to indicate that this readdir is finished, 0 if it copied
  * all the directory data read in and it may be possible to read more
  * and greater than 0 for a failure.
  */
 int
 fuse_internal_readdir_processdata(struct uio *uio,
     off_t startoff,
     int *fnd_start,
     size_t reqsize,
     void *buf,
     size_t bufsize,
     struct fuse_iov *cookediov,
     int *ncookies,
     u_long **cookiesp)
 {
 	int err = 0;
 	int bytesavail;
 	size_t freclen;
 
 	struct dirent *de;
 	struct fuse_dirent *fudge;
 	u_long *cookies;
 
 	cookies = *cookiesp;
 	if (bufsize < FUSE_NAME_OFFSET)
 		return -1;
 	for (;;) {
 		if (bufsize < FUSE_NAME_OFFSET) {
 			err = -1;
 			break;
 		}
 		fudge = (struct fuse_dirent *)buf;
 		freclen = FUSE_DIRENT_SIZE(fudge);
 
 		if (bufsize < freclen) {
 			/*
 			 * This indicates a partial directory entry at the
 			 * end of the directory data.
 			 */
 			err = -1;
 			break;
 		}
 #ifdef ZERO_PAD_INCOMPLETE_BUFS
 		if (isbzero(buf, FUSE_NAME_OFFSET)) {
 			err = -1;
 			break;
 		}
 #endif
 
 		if (!fudge->namelen || fudge->namelen > MAXNAMLEN) {
 			err = EINVAL;
 			break;
 		}
 		bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *)
 					    &fudge->namelen);
 
 		if (bytesavail > uio_resid(uio)) {
 			/* Out of space for the dir so we are done. */
 			err = -1;
 			break;
 		}
 		/*
 		 * Don't start to copy the directory entries out until
 		 * the requested offset in the directory is found.
 		 */
 		if (*fnd_start != 0) {
 			fiov_adjust(cookediov, bytesavail);
 			bzero(cookediov->base, bytesavail);
 
 			de = (struct dirent *)cookediov->base;
 			de->d_fileno = fudge->ino;
 			de->d_reclen = bytesavail;
 			de->d_type = fudge->type;
 			de->d_namlen = fudge->namelen;
 			memcpy((char *)cookediov->base + sizeof(struct dirent) -
 			       MAXNAMLEN - 1,
 			       (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
 			dirent_terminate(de);
 
 			err = uiomove(cookediov->base, cookediov->len, uio);
 			if (err)
 				break;
 			if (cookies != NULL) {
 				if (*ncookies == 0) {
 					err = -1;
 					break;
 				}
 				*cookies = fudge->off;
 				cookies++;
 				(*ncookies)--;
 			}
 		} else if (startoff == fudge->off)
 			*fnd_start = 1;
 		buf = (char *)buf + freclen;
 		bufsize -= freclen;
 		uio_setoffset(uio, fudge->off);
 	}
 	*cookiesp = cookies;
 
 	return err;
 }
 
 /* remove */
 
 int
 fuse_internal_remove(struct vnode *dvp,
     struct vnode *vp,
     struct componentname *cnp,
     enum fuse_opcode op)
 {
 	struct fuse_dispatcher fdi;
 	int err = 0;
 
 	fdisp_init(&fdi, cnp->cn_namelen + 1);
 	fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred);
 
 	memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
 	((char *)fdi.indata)[cnp->cn_namelen] = '\0';
 
 	err = fdisp_wait_answ(&fdi);
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /* rename */
 
 int
 fuse_internal_rename(struct vnode *fdvp,
     struct componentname *fcnp,
     struct vnode *tdvp,
     struct componentname *tcnp)
 {
 	struct fuse_dispatcher fdi;
 	struct fuse_rename_in *fri;
 	int err = 0;
 
 	fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2);
 	fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred);
 
 	fri = fdi.indata;
 	fri->newdir = VTOI(tdvp);
 	memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr,
 	    fcnp->cn_namelen);
 	((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0';
 	memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1,
 	    tcnp->cn_nameptr, tcnp->cn_namelen);
 	((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen +
 	    tcnp->cn_namelen + 1] = '\0';
 
 	err = fdisp_wait_answ(&fdi);
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /* strategy */
 
 /* entity creation */
 
 void
 fuse_internal_newentry_makerequest(struct mount *mp,
     uint64_t dnid,
     struct componentname *cnp,
     enum fuse_opcode op,
     void *buf,
     size_t bufsize,
     struct fuse_dispatcher *fdip)
 {
 	fdip->iosize = bufsize + cnp->cn_namelen + 1;
 
 	fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred);
 	memcpy(fdip->indata, buf, bufsize);
 	memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen);
 	((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0';
 }
 
 int
 fuse_internal_newentry_core(struct vnode *dvp,
     struct vnode **vpp,
     struct componentname *cnp,
     enum vtype vtyp,
     struct fuse_dispatcher *fdip)
 {
 	int err = 0;
 	struct fuse_entry_out *feo;
 	struct mount *mp = vnode_mount(dvp);
 
 	if ((err = fdisp_wait_answ(fdip))) {
 		return err;
 	}
 	feo = fdip->answ;
 
 	if ((err = fuse_internal_checkentry(feo, vtyp))) {
 		return err;
 	}
 	err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp);
 	if (err) {
 		fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred,
 		    feo->nodeid, 1);
 		return err;
 	}
 
 	/* 
 	 * Purge the parent's attribute cache because the daemon should've
 	 * updated its mtime and ctime
 	 */
 	fuse_vnode_clear_attr_cache(dvp);
 
 	fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
 		feo->attr_valid_nsec, NULL);
 
 	return err;
 }
 
 int
 fuse_internal_newentry(struct vnode *dvp,
     struct vnode **vpp,
     struct componentname *cnp,
     enum fuse_opcode op,
     void *buf,
     size_t bufsize,
     enum vtype vtype)
 {
 	int err;
 	struct fuse_dispatcher fdi;
 	struct mount *mp = vnode_mount(dvp);
 
 	fdisp_init(&fdi, 0);
 	fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf,
 	    bufsize, &fdi);
 	err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi);
 	fdisp_destroy(&fdi);
 
 	return err;
 }
 
 /* entity destruction */
 
 int
 fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio)
 {
 	fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL,
 	    ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1);
 
 	return 0;
 }
 
 void
 fuse_internal_forget_send(struct mount *mp,
     struct thread *td,
     struct ucred *cred,
     uint64_t nodeid,
     uint64_t nlookup)
 {
 
 	struct fuse_dispatcher fdi;
 	struct fuse_forget_in *ffi;
 
 	/*
          * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu",
          *         (long long unsigned) nodeid));
          */
 
 	fdisp_init(&fdi, sizeof(*ffi));
 	fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred);
 
 	ffi = fdi.indata;
 	ffi->nlookup = nlookup;
 
 	fuse_insert_message(fdi.tick, false);
 	fdisp_destroy(&fdi);
 }
 
 /* Fetch the vnode's attributes from the daemon*/
 int
 fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
 	struct ucred *cred, struct thread *td)
 {
 	struct fuse_dispatcher fdi;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_getattr_in *fgai;
 	struct fuse_attr_out *fao;
 	off_t old_filesize = fvdat->cached_attrs.va_size;
 	enum vtype vtyp;
 	int err;
 
 	fdisp_init(&fdi, 0);
 	fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
 	fgai = fdi.indata;
 	/* 
 	 * We could look up a file handle and set it in fgai->fh, but that
 	 * involves extra runtime work and I'm unaware of any file systems that
 	 * care.
 	 */
 	fgai->getattr_flags = 0;
 	if ((err = fdisp_simple_putget_vp(&fdi, FUSE_GETATTR, vp, td, cred))) {
 		if (err == ENOENT)
 			fuse_internal_vnode_disappear(vp);
 		goto out;
 	}
 
 	fao = (struct fuse_attr_out *)fdi.answ;
 	vtyp = IFTOVT(fao->attr.mode);
 	if (fvdat->flag & FN_SIZECHANGE)
 		fao->attr.size = old_filesize;
 	fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
 		fao->attr_valid_nsec, vap);
 	if (vtyp != vnode_vtype(vp)) {
 		fuse_internal_vnode_disappear(vp);
 		err = ENOENT;
 	}
 
 out:
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /* Read a vnode's attributes from cache or fetch them from the fuse daemon */
 int
 fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred,
 	struct thread *td)
 {
 	struct vattr *attrs;
 
 	if ((attrs = VTOVA(vp)) != NULL) {
 		*vap = *attrs;	/* struct copy */
 		return 0;
 	}
 
 	return fuse_internal_do_getattr(vp, vap, cred, td);
 }
 
 void
 fuse_internal_vnode_disappear(struct vnode *vp)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 
 	ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear");
 	fvdat->flag |= FN_REVOKED;
 	bintime_clear(&fvdat->attr_cache_timeout);
 	bintime_clear(&fvdat->entry_cache_timeout);
 	cache_purge(vp);
 }
 
 /* fuse start/stop */
 
 int
 fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
 {
 	int err = 0;
 	struct fuse_data *data = tick->tk_data;
 	struct fuse_init_out *fiio;
 
 	if ((err = tick->tk_aw_ohead.error)) {
 		goto out;
 	}
 	if ((err = fticket_pull(tick, uio))) {
 		goto out;
 	}
 	fiio = fticket_resp(tick)->base;
 
 	/* XXX: Do we want to check anything further besides this? */
 	if (fiio->major < 7) {
 		SDT_PROBE2(fusefs, , internal, trace, 1,
 			"userpace version too low");
 		err = EPROTONOSUPPORT;
 		goto out;
 	}
 	data->fuse_libabi_major = fiio->major;
 	data->fuse_libabi_minor = fiio->minor;
 
 	if (fuse_libabi_geq(data, 7, 5)) {
 		if (fticket_resp(tick)->len == sizeof(struct fuse_init_out)) {
-			data->max_readahead = fiio->max_readahead;
+			data->max_readahead_blocks = fiio->max_readahead /
+				maxbcachebuf;
 			data->max_write = fiio->max_write;
 			if (fiio->flags & FUSE_ASYNC_READ)
 				data->dataflags |= FSESS_ASYNC_READ;
 			if (fiio->flags & FUSE_POSIX_LOCKS)
 				data->dataflags |= FSESS_POSIX_LOCKS;
 			if (fiio->flags & FUSE_EXPORT_SUPPORT)
 				data->dataflags |= FSESS_EXPORT_SUPPORT;
 			/* 
 			 * Don't bother to check FUSE_BIG_WRITES, because it's
 			 * redundant with max_write
 			 */
 		} else {
 			err = EINVAL;
 		}
 	} else {
 		/* Old fix values */
 		data->max_write = 4096;
 	}
 
 out:
 	if (err) {
 		fdata_set_dead(data);
 	}
 	FUSE_LOCK();
 	data->dataflags |= FSESS_INITED;
 	wakeup(&data->ticketer);
 	FUSE_UNLOCK();
 
 	return 0;
 }
 
 void
 fuse_internal_send_init(struct fuse_data *data, struct thread *td)
 {
 	struct fuse_init_in *fiii;
 	struct fuse_dispatcher fdi;
 
 	fdisp_init(&fdi, sizeof(*fiii));
 	fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL);
 	fiii = fdi.indata;
 	fiii->major = FUSE_KERNEL_VERSION;
 	fiii->minor = FUSE_KERNEL_MINOR_VERSION;
 	/* 
 	 * fusefs currently reads ahead no more than one cache block at a time.
 	 * See fuse_read_biobackend
 	 */
 	fiii->max_readahead = maxbcachebuf;
 	/*
 	 * Unsupported features:
 	 * FUSE_FILE_OPS: No known FUSE server or client supports it
 	 * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it
 	 * FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even
 	 *	when default ACLs are in use.
 	 */
 	fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT
 		| FUSE_BIG_WRITES;
 
 	fuse_insert_callback(fdi.tick, fuse_internal_init_callback);
 	fuse_insert_message(fdi.tick, false);
 	fdisp_destroy(&fdi);
 }
 
 /* 
  * Send a FUSE_SETATTR operation with no permissions checks.  If cred is NULL,
  * send the request with root credentials
  */
 int fuse_internal_setattr(struct vnode *vp, struct vattr *vap,
 	struct thread *td, struct ucred *cred)
 {
 	struct fuse_dispatcher fdi;
 	struct fuse_setattr_in *fsai;
 	struct mount *mp;
 	pid_t pid = td->td_proc->p_pid;
 	struct fuse_data *data;
 	int dataflags;
 	int err = 0;
 	enum vtype vtyp;
 	int sizechanged = -1;
 	uint64_t newsize = 0;
 
 	mp = vnode_mount(vp);
 	data = fuse_get_mpdata(mp);
 	dataflags = data->dataflags;
 
 	fdisp_init(&fdi, sizeof(*fsai));
 	fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred);
 	if (!cred) {
 		fdi.finh->uid = 0;
 		fdi.finh->gid = 0;
 	}
 	fsai = fdi.indata;
 	fsai->valid = 0;
 
 	if (vap->va_uid != (uid_t)VNOVAL) {
 		fsai->uid = vap->va_uid;
 		fsai->valid |= FATTR_UID;
 	}
 	if (vap->va_gid != (gid_t)VNOVAL) {
 		fsai->gid = vap->va_gid;
 		fsai->valid |= FATTR_GID;
 	}
 	if (vap->va_size != VNOVAL) {
 		struct fuse_filehandle *fufh = NULL;
 
 		/*Truncate to a new value. */
 		fsai->size = vap->va_size;
 		sizechanged = 1;
 		newsize = vap->va_size;
 		fsai->valid |= FATTR_SIZE;
 
 		fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
 		if (fufh) {
 			fsai->fh = fufh->fh_id;
 			fsai->valid |= FATTR_FH;
 		}
 		VTOFUD(vp)->flag &= ~FN_SIZECHANGE;
 	}
 	if (vap->va_atime.tv_sec != VNOVAL) {
 		fsai->atime = vap->va_atime.tv_sec;
 		fsai->atimensec = vap->va_atime.tv_nsec;
 		fsai->valid |= FATTR_ATIME;
 		if (vap->va_vaflags & VA_UTIMES_NULL)
 			fsai->valid |= FATTR_ATIME_NOW;
 	}
 	if (vap->va_mtime.tv_sec != VNOVAL) {
 		fsai->mtime = vap->va_mtime.tv_sec;
 		fsai->mtimensec = vap->va_mtime.tv_nsec;
 		fsai->valid |= FATTR_MTIME;
 		if (vap->va_vaflags & VA_UTIMES_NULL)
 			fsai->valid |= FATTR_MTIME_NOW;
 	}
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		fsai->mode = vap->va_mode & ALLPERMS;
 		fsai->valid |= FATTR_MODE;
 	}
 	if (!fsai->valid) {
 		goto out;
 	}
 
 	if ((err = fdisp_wait_answ(&fdi)))
 		goto out;
 	vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode);
 
 	if (vnode_vtype(vp) != vtyp) {
 		if (vnode_vtype(vp) == VNON && vtyp != VNON) {
 			SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! "
 				"vnode_vtype is VNON and vtype isn't.");
 		} else {
 			/*
 	                 * STALE vnode, ditch
 	                 *
 			 * The vnode has changed its type "behind our back".
 			 * There's nothing really we can do, so let us just
 			 * force an internal revocation and tell the caller to
 			 * try again, if interested.
 	                 */
 			fuse_internal_vnode_disappear(vp);
 			err = EAGAIN;
 		}
 	}
 	if (err == 0) {
 		struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ;
 		fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
 			fao->attr_valid_nsec, NULL);
 	}
 
 out:
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 #ifdef ZERO_PAD_INCOMPLETE_BUFS
 static int
 isbzero(void *buf, size_t len)
 {
 	int i;
 
 	for (i = 0; i < len; i++) {
 		if (((char *)buf)[i])
 			return (0);
 	}
 
 	return (1);
 }
 
 #endif
Index: projects/fuse2/sys/fs/fuse/fuse_io.c
===================================================================
--- projects/fuse2/sys/fs/fuse/fuse_io.c	(revision 349237)
+++ projects/fuse2/sys/fs/fuse/fuse_io.c	(revision 349238)
@@ -1,1029 +1,1029 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/filedesc.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 
 #include "fuse.h"
 #include "fuse_file.h"
 #include "fuse_node.h"
 #include "fuse_internal.h"
 #include "fuse_ipc.h"
 #include "fuse_io.h"
 
 SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(fusefs, , io, trace, "int", "char*");
 
 static void
 fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
 	struct thread *td);
 static int 
 fuse_read_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh);
 static int 
 fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
     struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid);
 static int 
 fuse_write_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize,
     int ioflag, bool pages);
 static int 
 fuse_write_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid);
 
 /*
  * FreeBSD clears the SUID and SGID bits on any write by a non-root user.
  */
 static void
 fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
 	struct thread *td)
 {
 	struct fuse_data *data;
 	struct mount *mp;
 	struct vattr va;
 	int dataflags;
 
 	mp = vnode_mount(vp);
 	data = fuse_get_mpdata(mp);
 	dataflags = data->dataflags;
 
 	if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
 			fuse_internal_getattr(vp, &va, cred, td);
 			if (va.va_mode & (S_ISUID | S_ISGID)) {
 				mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID);
 				/* Clear all vattr fields except mode */
 				vattr_null(&va);
 				va.va_mode = mode;
 
 				/*
 				 * Ignore fuse_internal_setattr's return value,
 				 * because at this point the write operation has
 				 * already succeeded and we don't want to return
 				 * failing status for that.
 				 */
 				(void)fuse_internal_setattr(vp, &va, td, NULL);
 			}
 		}
 	}
 }
 
 SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch, "struct vnode*", "struct uio*",
 		"int", "struct ucred*", "struct fuse_filehandle*");
 int
 fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, bool pages,
     struct ucred *cred, pid_t pid)
 {
 	struct fuse_filehandle *fufh;
 	int err, directio;
 	int fflag;
 	bool closefufh = false;
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 
 	fflag = (uio->uio_rw == UIO_READ) ? FREAD : FWRITE;
 	err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
 	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
 		/* 
 		 * nfsd will do I/O without first doing VOP_OPEN.  We
 		 * must implicitly open the file here
 		 */
 		err = fuse_filehandle_open(vp, fflag, &fufh, curthread, cred);
 		closefufh = true;
 	}
 	else if (err) {
 		printf("FUSE: io dispatch: filehandles are closed\n");
 		return err;
 	}
 	if (err)
 		goto out;
 	SDT_PROBE5(fusefs, , io, io_dispatch, vp, uio, ioflag, cred, fufh);
 
 	/*
          * Ideally, when the daemon asks for direct io at open time, the
          * standard file flag should be set according to this, so that would
          * just change the default mode, which later on could be changed via
          * fcntl(2).
          * But this doesn't work, the O_DIRECT flag gets cleared at some point
          * (don't know where). So to make any use of the Fuse direct_io option,
          * we hardwire it into the file's private data (similarly to Linux,
          * btw.).
          */
 	directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp));
 
 	switch (uio->uio_rw) {
 	case UIO_READ:
 		if (directio) {
 			SDT_PROBE2(fusefs, , io, trace, 1,
 				"direct read of vnode");
 			err = fuse_read_directbackend(vp, uio, cred, fufh);
 		} else {
 			SDT_PROBE2(fusefs, , io, trace, 1,
 				"buffered read of vnode");
 			err = fuse_read_biobackend(vp, uio, ioflag, cred, fufh,
 				pid);
 		}
 		break;
 	case UIO_WRITE:
 		if (directio) {
 			const int iosize = fuse_iosize(vp);
 			off_t start, end, filesize;
 
 			SDT_PROBE2(fusefs, , io, trace, 1,
 				"direct write of vnode");
 
 			err = fuse_vnode_size(vp, &filesize, cred, curthread);
 			if (err)
 				goto out;
 
 			start = uio->uio_offset;
 			end = start + uio->uio_resid;
 			/* 
 			 * Invalidate the write cache unless we're coming from
 			 * VOP_PUTPAGES, in which case we're writing _from_ the
 			 * write cache
 			 */
 			if (!pages )
 				v_inval_buf_range(vp, start, end, iosize);
 			err = fuse_write_directbackend(vp, uio, cred, fufh,
 				filesize, ioflag, pages);
 		} else {
 			SDT_PROBE2(fusefs, , io, trace, 1,
 				"buffered write of vnode");
 			if (fuse_data_cache_mode == FUSE_CACHE_WT)
 				ioflag |= IO_SYNC;
 			err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag,
 				pid);
 		}
 		fuse_io_clear_suid_on_write(vp, cred, uio->uio_td);
 		break;
 	default:
 		panic("uninterpreted mode passed to fuse_io_dispatch");
 	}
 
 out:
 	if (closefufh)
 		fuse_filehandle_close(vp, fufh, curthread, cred);
 
 	return (err);
 }
 
 SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_start, "int", "int", "int", "int");
 SDT_PROBE_DEFINE2(fusefs, , io, read_bio_backend_feed, "int", "struct buf*");
 SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_end, "int", "ssize_t", "int",
 		"struct buf*");
 static int
 fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
     struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid)
 {
 	struct buf *bp;
 	struct mount *mp;
 	struct fuse_data *data;
 	daddr_t lbn, nextlbn;
 	int bcount, nextsize;
 	int err, n = 0, on = 0, seqcount;
 	off_t filesize;
 
 	const int biosize = fuse_iosize(vp);
 	mp = vnode_mount(vp);
 	data = fuse_get_mpdata(mp);
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	seqcount = ioflag >> IO_SEQSHIFT;
 
 	err = fuse_vnode_size(vp, &filesize, cred, curthread);
 	if (err)
 		return err;
 
 	for (err = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if (fuse_isdeadfs(vp)) {
 			err = ENXIO;
 			break;
 		}
 		if (filesize - uio->uio_offset <= 0)
 			break;
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 
 		if ((off_t)lbn * biosize >= filesize) {
 			bcount = 0;
 		} else if ((off_t)(lbn + 1) * biosize > filesize) {
 			bcount = filesize - (off_t)lbn *biosize;
 		} else {
 			bcount = biosize;
 		}
 		nextlbn = lbn + 1;
 		nextsize = MIN(biosize, filesize - nextlbn * biosize);
 
 		SDT_PROBE4(fusefs, , io, read_bio_backend_start,
 			biosize, (int)lbn, on, bcount);
 
 		if (bcount < biosize) {
 			/* If near EOF, don't do readahead */
 			err = bread(vp, lbn, bcount, NOCRED, &bp);
 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			/* Try clustered read */
 			long totread = uio->uio_resid + on;
 			seqcount = MIN(seqcount,
-				data->max_readahead / biosize + 1);
+				data->max_readahead_blocks + 1);
 			err = cluster_read(vp, filesize, lbn, bcount, NOCRED,
 				totread, seqcount, 0, &bp);
-		} else if (seqcount > 1 && data->max_readahead >= nextsize) {
+		} else if (seqcount > 1 && data->max_readahead_blocks >= 1) {
 			/* Try non-clustered readahead */
 			err = breadn(vp, lbn, bcount, &nextlbn, &nextsize, 1,
 				NOCRED, &bp);
 		} else {
 			/* Just read what was requested */
 			err = bread(vp, lbn, bcount, NOCRED, &bp);
 		}
 
 		if (err) {
 			brelse(bp);
 			bp = NULL;
 			break;
 		}
 
 		/*
 	         * on is the offset into the current bp.  Figure out how many
 	         * bytes we can copy out of the bp.  Note that bcount is
 	         * NOT DEV_BSIZE aligned.
 	         *
 	         * Then figure out how many bytes we can copy into the uio.
 	         */
 
 		n = 0;
 		if (on < bcount)
 			n = MIN((unsigned)(bcount - on), uio->uio_resid);
 		if (n > 0) {
 			SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp);
 			err = uiomove(bp->b_data + on, n, uio);
 		}
 		vfs_bio_brelse(bp, ioflag);
 		SDT_PROBE4(fusefs, , io, read_bio_backend_end, err,
 			uio->uio_resid, n, bp);
 	}
 
 	return (err);
 }
 
 SDT_PROBE_DEFINE1(fusefs, , io, read_directbackend_start,
 	"struct fuse_read_in*");
 SDT_PROBE_DEFINE3(fusefs, , io, read_directbackend_complete,
 	"struct fuse_dispatcher*", "struct fuse_read_in*", "struct uio*");
 
 static int
 fuse_read_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh)
 {
 	struct fuse_data *data;
 	struct fuse_dispatcher fdi;
 	struct fuse_read_in *fri;
 	int err = 0;
 
 	data = fuse_get_mpdata(vp->v_mount);
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	fdisp_init(&fdi, 0);
 
 	/*
          * XXX In "normal" case we use an intermediate kernel buffer for
          * transmitting data from daemon's context to ours. Eventually, we should
          * get rid of this. Anyway, if the target uio lives in sysspace (we are
          * called from pageops), and the input data doesn't need kernel-side
          * processing (we are not called from readdir) we can already invoke
          * an optimized, "peer-to-peer" I/O routine.
          */
 	while (uio->uio_resid > 0) {
 		fdi.iosize = sizeof(*fri);
 		fdisp_make_vp(&fdi, FUSE_READ, vp, uio->uio_td, cred);
 		fri = fdi.indata;
 		fri->fh = fufh->fh_id;
 		fri->offset = uio->uio_offset;
 		fri->size = MIN(uio->uio_resid,
 		    fuse_get_mpdata(vp->v_mount)->max_read);
 		if (fuse_libabi_geq(data, 7, 9)) {
 			/* See comment regarding FUSE_WRITE_LOCKOWNER */
 			fri->read_flags = 0;
 			fri->flags = fufh_type_2_fflags(fufh->fufh_type);
 		}
 
 		SDT_PROBE1(fusefs, , io, read_directbackend_start, fri);
 
 		if ((err = fdisp_wait_answ(&fdi)))
 			goto out;
 
 		SDT_PROBE3(fusefs, , io, read_directbackend_complete,
 			&fdi, fri, uio);
 
 		if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio)))
 			break;
 		if (fdi.iosize < fri->size)
 			break;
 	}
 
 out:
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 static int
 fuse_write_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize,
     int ioflag, bool pages)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_data *data;
 	struct fuse_write_in *fwi;
 	struct fuse_write_out *fwo;
 	struct fuse_dispatcher fdi;
 	size_t chunksize;
 	void *fwi_data;
 	off_t as_written_offset;
 	int diff;
 	int err = 0;
 	bool direct_io = fufh->fuse_open_flags & FOPEN_DIRECT_IO;
 	uint32_t write_flags;
 
 	data = fuse_get_mpdata(vp->v_mount);
 
 	/* 
 	 * Don't set FUSE_WRITE_LOCKOWNER in write_flags.  It can't be set
 	 * accurately when using POSIX AIO, libfuse doesn't use it, and I'm not
 	 * aware of any file systems that do.  It was an attempt to add
 	 * Linux-style mandatory locking to the FUSE protocol, but mandatory
 	 * locking is deprecated even on Linux.  See Linux commit
 	 * f33321141b273d60cbb3a8f56a5489baad82ba5e .
 	 */
 	/*
 	 * Set FUSE_WRITE_CACHE whenever we don't know the uid, gid, and/or pid
 	 * that originated a write.  For example when writing from the
 	 * writeback cache.  I don't know of a single file system that cares,
 	 * but the protocol says we're supposed to do this.
 	 */
 	write_flags = !pages && (
 		(ioflag & IO_DIRECT) ||
 		!fsess_opt_datacache(vnode_mount(vp)) ||
 		fuse_data_cache_mode != FUSE_CACHE_WB) ? 0 : FUSE_WRITE_CACHE;
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	if (ioflag & IO_APPEND)
 		uio_setoffset(uio, filesize);
 
 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
 		return (EFBIG);
 
 	fdisp_init(&fdi, 0);
 
 	while (uio->uio_resid > 0) {
 		chunksize = MIN(uio->uio_resid, data->max_write);
 
 		fdi.iosize = sizeof(*fwi) + chunksize;
 		fdisp_make_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred);
 
 		fwi = fdi.indata;
 		fwi->fh = fufh->fh_id;
 		fwi->offset = uio->uio_offset;
 		fwi->size = chunksize;
 		fwi->write_flags = write_flags;
 		if (fuse_libabi_geq(data, 7, 9)) {
 			fwi->flags = fufh_type_2_fflags(fufh->fufh_type);
 			fwi_data = (char *)fdi.indata + sizeof(*fwi);
 		} else {
 			fwi_data = (char *)fdi.indata +
 				FUSE_COMPAT_WRITE_IN_SIZE;
 		}
 
 		if ((err = uiomove(fwi_data, chunksize, uio)))
 			break;
 
 retry:
 		err = fdisp_wait_answ(&fdi);
 		if (err == ERESTART || err == EINTR || err == EWOULDBLOCK) {
 			/*
 			 * Rewind the uio so dofilewrite will know it's
 			 * incomplete
 			 */
 			uio->uio_resid += fwi->size;
 			uio->uio_offset -= fwi->size;
 			/* 
 			 * Change ERESTART into EINTR because we can't rewind
 			 * uio->uio_iov.  Basically, once uiomove(9) has been
 			 * called, it's impossible to restart a syscall.
 			 */
 			if (err == ERESTART)
 				err = EINTR;
 			break;
 		} else if (err) {
 			break;
 		}
 
 		fwo = ((struct fuse_write_out *)fdi.answ);
 
 		/* Adjust the uio in the case of short writes */
 		diff = fwi->size - fwo->size;
 		as_written_offset = uio->uio_offset - diff;
 
 		if (as_written_offset - diff > filesize &&
 		    fuse_data_cache_mode != FUSE_CACHE_UC)
 			fuse_vnode_setsize(vp, as_written_offset);
 		if (as_written_offset - diff >= filesize)
 			fvdat->flag &= ~FN_SIZECHANGE;
 
 		if (diff < 0) {
 			printf("WARNING: misbehaving FUSE filesystem "
 				"wrote more data than we provided it\n");
 			err = EINVAL;
 			break;
 		} else if (diff > 0) {
 			/* Short write */
 			if (!direct_io) {
 				printf("WARNING: misbehaving FUSE filesystem: "
 					"short writes are only allowed with "
 					"direct_io\n");
 			}
 			if (ioflag & IO_DIRECT) {
 				/* Return early */
 				uio->uio_resid += diff;
 				uio->uio_offset -= diff;
 				break;
 			} else {
 				/* Resend the unwritten portion of data */
 				fdi.iosize = sizeof(*fwi) + diff;
 				/* Refresh fdi without clearing data buffer */
 				fdisp_refresh_vp(&fdi, FUSE_WRITE, vp,
 					uio->uio_td, cred);
 				fwi = fdi.indata;
 				MPASS2(fwi == fdi.indata, "FUSE dispatcher "
 					"reallocated despite no increase in "
 					"size?");
 				void *src = (char*)fwi_data + fwo->size;
 				memmove(fwi_data, src, diff);
 				fwi->fh = fufh->fh_id;
 				fwi->offset = as_written_offset;
 				fwi->size = diff;
 				fwi->write_flags = write_flags;
 				goto retry;
 			}
 		}
 	}
 
 	fdisp_destroy(&fdi);
 
 	return (err);
 }
 
 SDT_PROBE_DEFINE6(fusefs, , io, write_biobackend_start, "int64_t", "int", "int",
 		"struct uio*", "int", "bool");
 SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_append_race, "long", "int");
 SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_issue, "int", "struct buf*");
 
 static int
 fuse_write_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct buf *bp;
 	daddr_t lbn;
 	off_t filesize;
 	int bcount;
 	int n, on, seqcount, err = 0;
 	bool last_page;
 
 	const int biosize = fuse_iosize(vp);
 
 	seqcount = ioflag >> IO_SEQSHIFT;
 
 	KASSERT(uio->uio_rw == UIO_WRITE, ("fuse_write_biobackend mode"));
 	if (vp->v_type != VREG)
 		return (EIO);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	if (uio->uio_resid == 0)
 		return (0);
 
 	err = fuse_vnode_size(vp, &filesize, cred, curthread);
 	if (err)
 		return err;
 
 	if (ioflag & IO_APPEND)
 		uio_setoffset(uio, filesize);
 
 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
 		return (EFBIG);
 
 	do {
 		bool direct_append, extending;
 
 		if (fuse_isdeadfs(vp)) {
 			err = ENXIO;
 			break;
 		}
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
 
 again:
 		/* Get or create a buffer for the write */
 		direct_append = uio->uio_offset == filesize && n;
 		if (uio->uio_offset + n < filesize) {
 			extending = false;
 			if ((off_t)(lbn + 1) * biosize < filesize) {
 				/* Not the file's last block */
 				bcount = biosize;
 			} else {
 				/* The file's last block */
 				bcount = filesize - (off_t)lbn * biosize;
 			}
 		} else {
 			extending = true;
 			bcount = on + n;
 		}
 		if (howmany(((off_t)lbn * biosize + on + n - 1), PAGE_SIZE) >=
 		    howmany(filesize, PAGE_SIZE))
 			last_page = true;
 		else
 			last_page = false;
 		if (direct_append) {
 			/* 
 			 * Take care to preserve the buffer's B_CACHE state so
 			 * as not to cause an unnecessary read.
 			 */
 			bp = getblk(vp, lbn, on, PCATCH, 0, 0);
 			if (bp != NULL) {
 				uint32_t save = bp->b_flags & B_CACHE;
 				allocbuf(bp, bcount);
 				bp->b_flags |= save;
 			}
 		} else {
 			bp = getblk(vp, lbn, bcount, PCATCH, 0, 0);
 		}
 		if (!bp) {
 			err = EINTR;
 			break;
 		}
 		if (extending) {
 			/* 
 			 * Extend file _after_ locking buffer so we won't race
 			 * with other readers
 			 */
 			err = fuse_vnode_setsize(vp, uio->uio_offset + n);
 			filesize = uio->uio_offset + n;
 			fvdat->flag |= FN_SIZECHANGE;
 			if (err) {
 				brelse(bp);
 				break;
 			} 
 		}
 
 		SDT_PROBE6(fusefs, , io, write_biobackend_start,
 			lbn, on, n, uio, bcount, direct_append);
 		/*
 	         * Issue a READ if B_CACHE is not set.  In special-append
 	         * mode, B_CACHE is based on the buffer prior to the write
 	         * op and is typically set, avoiding the read.  If a read
 	         * is required in special append mode, the server will
 	         * probably send us a short-read since we extended the file
 	         * on our end, resulting in b_resid == 0 and, thusly,
 	         * B_CACHE getting set.
 	         *
 	         * We can also avoid issuing the read if the write covers
 	         * the entire buffer.  We have to make sure the buffer state
 	         * is reasonable in this case since we will not be initiating
 	         * I/O.  See the comments in kern/vfs_bio.c's getblk() for
 	         * more information.
 	         *
 	         * B_CACHE may also be set due to the buffer being cached
 	         * normally.
 	         */
 
 		if (on == 0 && n == bcount) {
 			bp->b_flags |= B_CACHE;
 			bp->b_flags &= ~B_INVAL;
 			bp->b_ioflags &= ~BIO_ERROR;
 		}
 		if ((bp->b_flags & B_CACHE) == 0) {
 			bp->b_iocmd = BIO_READ;
 			vfs_busy_pages(bp, 0);
 			fuse_io_strategy(vp, bp);
 			if ((err = bp->b_error)) {
 				brelse(bp);
 				break;
 			}
 		}
 		if (bp->b_wcred == NOCRED)
 			bp->b_wcred = crhold(cred);
 
 		/*
 	         * If dirtyend exceeds file size, chop it down.  This should
 	         * not normally occur but there is an append race where it
 	         * might occur XXX, so we log it.
 	         *
 	         * If the chopping creates a reverse-indexed or degenerate
 	         * situation with dirtyoff/end, we 0 both of them.
 	         */
 		if (bp->b_dirtyend > bcount) {
 			SDT_PROBE2(fusefs, , io, write_biobackend_append_race,
 			    (long)bp->b_blkno * biosize,
 			    bp->b_dirtyend - bcount);
 			bp->b_dirtyend = bcount;
 		}
 		if (bp->b_dirtyoff >= bp->b_dirtyend)
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 
 		/*
 	         * If the new write will leave a contiguous dirty
 	         * area, just update the b_dirtyoff and b_dirtyend,
 	         * otherwise force a write rpc of the old dirty area.
 	         *
 	         * While it is possible to merge discontiguous writes due to
 	         * our having a B_CACHE buffer ( and thus valid read data
 	         * for the hole), we don't because it could lead to
 	         * significant cache coherency problems with multiple clients,
 	         * especially if locking is implemented later on.
 	         *
 	         * as an optimization we could theoretically maintain
 	         * a linked list of discontinuous areas, but we would still
 	         * have to commit them separately so there isn't much
 	         * advantage to it except perhaps a bit of asynchronization.
 	         */
 
 		if (bp->b_dirtyend > 0 &&
 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
 			/*
 	                 * Yes, we mean it. Write out everything to "storage"
 	                 * immediately, without hesitation. (Apart from other
 	                 * reasons: the only way to know if a write is valid
 	                 * if its actually written out.)
 	                 */
 			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 0, bp);
 			bwrite(bp);
 			if (bp->b_error == EINTR) {
 				err = EINTR;
 				break;
 			}
 			goto again;
 		}
 		err = uiomove((char *)bp->b_data + on, n, uio);
 
 		if (err) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = err;
 			brelse(bp);
 			break;
 			/* TODO: vfs_bio_clrbuf like ffs_write does? */
 		}
 		/*
 	         * Only update dirtyoff/dirtyend if not a degenerate
 	         * condition.
 	         */
 		if (n) {
 			if (bp->b_dirtyend > 0) {
 				bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
 				bp->b_dirtyend = MAX((on + n), bp->b_dirtyend);
 			} else {
 				bp->b_dirtyoff = on;
 				bp->b_dirtyend = on + n;
 			}
 			vfs_bio_set_valid(bp, on, n);
 		}
 
 		vfs_bio_set_flags(bp, ioflag);
 
 		if (ioflag & IO_SYNC) {
 			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 2, bp);
 			err = bwrite(bp);
 		} else if (vm_page_count_severe() ||
 			    buf_dirty_count_severe() ||
 			    (ioflag & IO_ASYNC)) {
 			bp->b_flags |= B_CLUSTEROK;
 			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 3, bp);
 			bawrite(bp);
 		} else if (on == 0 && n == bcount) {
 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
 				bp->b_flags |= B_CLUSTEROK;
 				SDT_PROBE2(fusefs, , io, write_biobackend_issue,
 					4, bp);
 				cluster_write(vp, bp, filesize, seqcount, 0);
 			} else {
 				SDT_PROBE2(fusefs, , io, write_biobackend_issue,
 					5, bp);
 				bawrite(bp);
 			}
 		} else if (ioflag & IO_DIRECT) {
 			bp->b_flags |= B_CLUSTEROK;
 			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 6, bp);
 			bawrite(bp);
 		} else {
 			bp->b_flags &= ~B_CLUSTEROK;
 			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 7, bp);
 			bdwrite(bp);
 		}
 		if (err)
 			break;
 	} while (uio->uio_resid > 0 && n > 0);
 
 	return (err);
 }
 
 int
 fuse_io_strategy(struct vnode *vp, struct buf *bp)
 {
 	struct fuse_filehandle *fufh;
 	struct ucred *cred;
 	struct uio *uiop;
 	struct uio uio;
 	struct iovec io;
 	off_t filesize;
 	int error = 0;
 	int fflag;
 	/* We don't know the true pid when we're dealing with the cache */
 	pid_t pid = 0;
 
 	const int biosize = fuse_iosize(vp);
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 	MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE);
 
 	fflag = bp->b_iocmd == BIO_READ ? FREAD : FWRITE;
 	cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred;
 	error = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
 	if (bp->b_iocmd == BIO_READ && error == EBADF) {
 		/* 
 		 * This may be a read-modify-write operation on a cached file
 		 * opened O_WRONLY.  The FUSE protocol allows this.
 		 */
 		error = fuse_filehandle_get(vp, FWRITE, &fufh, cred, pid);
 	}
 	if (error) {
 		printf("FUSE: strategy: filehandles are closed\n");
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = error;
 		bufdone(bp);
 		return (error);
 	}
 
 	uiop = &uio;
 	uiop->uio_iov = &io;
 	uiop->uio_iovcnt = 1;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = curthread;
 
 	/*
          * clear BIO_ERROR and B_INVAL state prior to initiating the I/O.  We
          * do this here so we do not have to do it in all the code that
          * calls us.
          */
 	bp->b_flags &= ~B_INVAL;
 	bp->b_ioflags &= ~BIO_ERROR;
 
 	KASSERT(!(bp->b_flags & B_DONE),
 	    ("fuse_io_strategy: bp %p already marked done", bp));
 	if (bp->b_iocmd == BIO_READ) {
 		io.iov_len = uiop->uio_resid = bp->b_bcount;
 		io.iov_base = bp->b_data;
 		uiop->uio_rw = UIO_READ;
 
 		uiop->uio_offset = ((off_t)bp->b_lblkno) * biosize;
 		error = fuse_read_directbackend(vp, uiop, cred, fufh);
 
 		if (!error && uiop->uio_resid) {
 			/*
 	                 * If we had a short read with no error, we must have
 	                 * hit a file hole.  We should zero-fill the remainder.
 	                 * This can also occur if the server hits the file EOF.
 	                 *
 	                 * Holes used to be able to occur due to pending
 	                 * writes, but that is not possible any longer.
 	                 */
 			int nread = bp->b_bcount - uiop->uio_resid;
 			int left = uiop->uio_resid;
 
 			if (left > 0)
 				bzero((char *)bp->b_data + nread, left);
 			uiop->uio_resid = 0;
 		}
 		if (error) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = error;
 		}
 	} else {
 		/*
 	         * Setup for actual write
 	         */
 		error = fuse_vnode_size(vp, &filesize, cred, curthread);
 		if (error) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = error;
 			bufdone(bp);
 			return (error);
 		}
 
 		if ((off_t)bp->b_lblkno * biosize + bp->b_dirtyend > filesize)
 			bp->b_dirtyend = filesize - 
 				(off_t)bp->b_lblkno * biosize;
 
 		if (bp->b_dirtyend > bp->b_dirtyoff) {
 			io.iov_len = uiop->uio_resid = bp->b_dirtyend
 			    - bp->b_dirtyoff;
 			uiop->uio_offset = (off_t)bp->b_lblkno * biosize
 			    + bp->b_dirtyoff;
 			io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 			uiop->uio_rw = UIO_WRITE;
 
 			error = fuse_write_directbackend(vp, uiop, cred, fufh,
 				filesize, 0, false);
 
 			if (error == EINTR || error == ETIMEDOUT) {
 				bp->b_flags &= ~(B_INVAL | B_NOCACHE);
 				if ((bp->b_flags & B_PAGING) == 0) {
 					bdirty(bp);
 					bp->b_flags &= ~B_DONE;
 				}
 				if ((error == EINTR || error == ETIMEDOUT) &&
 				    (bp->b_flags & B_ASYNC) == 0)
 					bp->b_flags |= B_EINTR;
 			} else {
 				if (error) {
 					bp->b_ioflags |= BIO_ERROR;
 					bp->b_flags |= B_INVAL;
 					bp->b_error = error;
 				}
 				bp->b_dirtyoff = bp->b_dirtyend = 0;
 			}
 		} else {
 			bp->b_resid = 0;
 			bufdone(bp);
 			return (0);
 		}
 	}
 	bp->b_resid = uiop->uio_resid;
 	bufdone(bp);
 	return (error);
 }
 
 int
 fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td)
 {
 
 	return (vn_fsync_buf(vp, waitfor));
 }
 
 /*
  * Flush and invalidate all dirty buffers. If another process is already
  * doing the flush, just wait for completion.
  */
 int
 fuse_io_invalbuf(struct vnode *vp, struct thread *td)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	int error = 0;
 
 	if (vp->v_iflag & VI_DOOMED)
 		return 0;
 
 	ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf");
 
 	while (fvdat->flag & FN_FLUSHINPROG) {
 		struct proc *p = td->td_proc;
 
 		if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)
 			return EIO;
 		fvdat->flag |= FN_FLUSHWANT;
 		tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz);
 		error = 0;
 		if (p != NULL) {
 			PROC_LOCK(p);
 			if (SIGNOTEMPTY(p->p_siglist) ||
 			    SIGNOTEMPTY(td->td_siglist))
 				error = EINTR;
 			PROC_UNLOCK(p);
 		}
 		if (error == EINTR)
 			return EINTR;
 	}
 	fvdat->flag |= FN_FLUSHINPROG;
 
 	if (vp->v_bufobj.bo_object != NULL) {
 		VM_OBJECT_WLOCK(vp->v_bufobj.bo_object);
 		vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC);
 		VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object);
 	}
 	error = vinvalbuf(vp, V_SAVE, PCATCH, 0);
 	while (error) {
 		if (error == ERESTART || error == EINTR) {
 			fvdat->flag &= ~FN_FLUSHINPROG;
 			if (fvdat->flag & FN_FLUSHWANT) {
 				fvdat->flag &= ~FN_FLUSHWANT;
 				wakeup(&fvdat->flag);
 			}
 			return EINTR;
 		}
 		error = vinvalbuf(vp, V_SAVE, PCATCH, 0);
 	}
 	fvdat->flag &= ~FN_FLUSHINPROG;
 	if (fvdat->flag & FN_FLUSHWANT) {
 		fvdat->flag &= ~FN_FLUSHWANT;
 		wakeup(&fvdat->flag);
 	}
 	return (error);
 }
Index: projects/fuse2/sys/fs/fuse/fuse_ipc.c
===================================================================
--- projects/fuse2/sys/fs/fuse/fuse_ipc.c	(revision 349237)
+++ projects/fuse2/sys/fs/fuse/fuse_ipc.c	(revision 349238)
@@ -1,1078 +1,1082 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/sdt.h>
 #include <sys/vnode.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <vm/uma.h>
 
 #include "fuse.h"
 #include "fuse_node.h"
 #include "fuse_ipc.h"
 #include "fuse_internal.h"
 
 SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(fusefs, , ipc, trace, "int", "char*");
 
 static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred);
 static void fuse_interrupt_send(struct fuse_ticket *otick, int err);
 static struct fuse_ticket *fticket_alloc(struct fuse_data *data);
 static void fticket_refresh(struct fuse_ticket *ftick);
 static void fticket_destroy(struct fuse_ticket *ftick);
 static int fticket_wait_answer(struct fuse_ticket *ftick);
 static inline int 
 fticket_aw_pull_uio(struct fuse_ticket *ftick,
     struct uio *uio);
 
 static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
 
 static fuse_handler_t fuse_standard_handler;
 
 SYSCTL_NODE(_vfs, OID_AUTO, fusefs, CTLFLAG_RW, 0, "FUSE tunables");
 static int fuse_ticket_count = 0;
 
 SYSCTL_INT(_vfs_fusefs, OID_AUTO, ticket_count, CTLFLAG_RW,
     &fuse_ticket_count, 0, "number of allocated tickets");
 static long fuse_iov_permanent_bufsize = 1 << 19;
 
 SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,
     &fuse_iov_permanent_bufsize, 0,
     "limit for permanently stored buffer size for fuse_iovs");
 static int fuse_iov_credit = 16;
 
 SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW,
     &fuse_iov_credit, 0,
     "how many times is an oversized fuse_iov tolerated");
 
 MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer");
 static uma_zone_t ticket_zone;
 
 /* 
  * TODO: figure out how to timeout INTERRUPT requests, because the daemon may
  * leagally never respond
  */
 static int
 fuse_interrupt_callback(struct fuse_ticket *tick, struct uio *uio)
 {
 	struct fuse_ticket *otick, *x_tick;
 	struct fuse_interrupt_in *fii;
 	struct fuse_data *data = tick->tk_data;
 	bool found = false;
 
 	fii = (struct fuse_interrupt_in*)((char*)tick->tk_ms_fiov.base +
 		sizeof(struct fuse_in_header));
 
 	fuse_lck_mtx_lock(data->aw_mtx);
 	TAILQ_FOREACH_SAFE(otick, &data->aw_head, tk_aw_link, x_tick) {
 		if (otick->tk_unique == fii->unique) {
 			found = true;
 			break;
 		}
 	}
 	fuse_lck_mtx_unlock(data->aw_mtx);
 
 	if (!found) {
 		/* Original is already complete.  Just return */
 		return 0;
 	}
 
 	/* Clear the original ticket's interrupt association */
 	otick->irq_unique = 0;
 
 	if (tick->tk_aw_ohead.error == ENOSYS) {
 		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
 		return 0;
 	} else if (tick->tk_aw_ohead.error == EAGAIN) {
 		/* 
 		 * There are two reasons we might get this:
 		 * 1) the daemon received the INTERRUPT request before the
 		 *    original, or
 		 * 2) the daemon received the INTERRUPT request after it
 		 *    completed the original request.
 		 * In the first case we should re-send the INTERRUPT.  In the
 		 * second, we should ignore it.
 		 */
 		/* Resend */
 		fuse_interrupt_send(otick, EINTR);
 		return 0;
 	} else {
 		/* Illegal FUSE_INTERRUPT response */
 		return EINVAL;
 	}
 }
 
 /* Interrupt the operation otick.  Return err as its error code */
 void
 fuse_interrupt_send(struct fuse_ticket *otick, int err)
 {
 	struct fuse_dispatcher fdi;
 	struct fuse_interrupt_in *fii;
 	struct fuse_in_header *ftick_hdr;
 	struct fuse_data *data = otick->tk_data;
 	struct fuse_ticket *tick, *xtick;
 	struct ucred reused_creds;
 	gid_t reused_groups[1];
 
 	if (otick->irq_unique == 0) {
 		/* 
 		 * If the daemon hasn't yet received otick, then we can answer
 		 * it ourselves and return.
 		 */
 		fuse_lck_mtx_lock(data->ms_mtx);
 		STAILQ_FOREACH_SAFE(tick, &otick->tk_data->ms_head, tk_ms_link,
 			xtick) {
 			if (tick == otick) {
 				STAILQ_REMOVE(&otick->tk_data->ms_head, tick,
 					fuse_ticket, tk_ms_link);
 				otick->tk_data->ms_count--;
 				otick->tk_ms_link.stqe_next = NULL;
 				fuse_lck_mtx_unlock(data->ms_mtx);
 
 				fuse_lck_mtx_lock(otick->tk_aw_mtx);
 				if (!fticket_answered(otick)) {
 					fticket_set_answered(otick);
 					otick->tk_aw_errno = err;
 					wakeup(otick);
 				}
 				fuse_lck_mtx_unlock(otick->tk_aw_mtx);
 
 				fuse_ticket_drop(tick);
 				return;
 			}
 		}
 		fuse_lck_mtx_unlock(data->ms_mtx);
 
 		/*
 		 * If the fuse daemon doesn't support interrupts, then there's
 		 * nothing more that we can do
 		 */
 		if (!fsess_isimpl(data->mp, FUSE_INTERRUPT))
 			return;
 
 		/* 
 		 * If the fuse daemon has already received otick, then we must
 		 * send FUSE_INTERRUPT.
 		 */
 		ftick_hdr = fticket_in_header(otick);
 		reused_creds.cr_uid = ftick_hdr->uid;
 		reused_groups[0] = ftick_hdr->gid;
 		reused_creds.cr_groups = reused_groups;
 		fdisp_init(&fdi, sizeof(*fii));
 		fdisp_make_pid(&fdi, FUSE_INTERRUPT, data, ftick_hdr->nodeid,
 			ftick_hdr->pid, &reused_creds);
 
 		fii = fdi.indata;
 		fii->unique = otick->tk_unique;
 		fuse_insert_callback(fdi.tick, fuse_interrupt_callback);
 
 		otick->irq_unique = fdi.tick->tk_unique;
 		/* Interrupt ops should be delivered ASAP */
 		fuse_insert_message(fdi.tick, true);
 		fdisp_destroy(&fdi);
 	} else {
 		/* This ticket has already been interrupted */
 	}
 }
 
 void
 fiov_init(struct fuse_iov *fiov, size_t size)
 {
 	uint32_t msize = FU_AT_LEAST(size);
 
 	fiov->len = 0;
 
 	fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO);
 
 	fiov->allocated_size = msize;
 	fiov->credit = fuse_iov_credit;
 }
 
 void
 fiov_teardown(struct fuse_iov *fiov)
 {
 	MPASS(fiov->base != NULL);
 	free(fiov->base, M_FUSEMSG);
 }
 
 void
 fiov_adjust(struct fuse_iov *fiov, size_t size)
 {
 	if (fiov->allocated_size < size ||
 	    (fuse_iov_permanent_bufsize >= 0 &&
 	    fiov->allocated_size - size > fuse_iov_permanent_bufsize &&
 	    --fiov->credit < 0)) {
 
 		fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG,
 		    M_WAITOK | M_ZERO);
 		if (!fiov->base) {
 			panic("FUSE: realloc failed");
 		}
 		fiov->allocated_size = FU_AT_LEAST(size);
 		fiov->credit = fuse_iov_credit;
 		/* Clear data buffer after reallocation */
 		bzero(fiov->base, size);
 	} else if (size > fiov->len) {
 		/* Clear newly extended portion of data buffer */
 		bzero((char*)fiov->base + fiov->len, size - fiov->len);
 	}
 	fiov->len = size;
 }
 
 /* Resize the fiov if needed, and clear it's buffer */
 void
 fiov_refresh(struct fuse_iov *fiov)
 {
 	fiov_adjust(fiov, 0);
 }
 
 static int
 fticket_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct fuse_ticket *ftick = mem;
 	struct fuse_data *data = arg;
 
 	FUSE_ASSERT_MS_DONE(ftick);
 	FUSE_ASSERT_AW_DONE(ftick);
 
 	ftick->tk_data = data;
 
 	if (ftick->tk_unique != 0)
 		fticket_refresh(ftick);
 
 	/* May be truncated to 32 bits */
 	ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
 	if (ftick->tk_unique == 0)
 		ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
 
 	ftick->irq_unique = 0;
 
 	refcount_init(&ftick->tk_refcount, 1);
 	atomic_add_acq_int(&fuse_ticket_count, 1);
 
 	return 0;
 }
 
 static void
 fticket_dtor(void *mem, int size, void *arg)
 {
 #ifdef INVARIANTS
 	struct fuse_ticket *ftick = mem;
 #endif
 
 	FUSE_ASSERT_MS_DONE(ftick);
 	FUSE_ASSERT_AW_DONE(ftick);
 
 	atomic_subtract_acq_int(&fuse_ticket_count, 1);
 }
 
 static int
 fticket_init(void *mem, int size, int flags)
 {
 	struct fuse_ticket *ftick = mem;
 
 	bzero(ftick, sizeof(struct fuse_ticket));
 
 	fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header));
 	ftick->tk_ms_type = FT_M_FIOV;
 
 	mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF);
 	fiov_init(&ftick->tk_aw_fiov, 0);
 	ftick->tk_aw_type = FT_A_FIOV;
 
 	return 0;
 }
 
 static void
 fticket_fini(void *mem, int size)
 {
 	struct fuse_ticket *ftick = mem;
 
 	fiov_teardown(&ftick->tk_ms_fiov);
 	fiov_teardown(&ftick->tk_aw_fiov);
 	mtx_destroy(&ftick->tk_aw_mtx);
 }
 
 static inline struct fuse_ticket *
 fticket_alloc(struct fuse_data *data)
 {
 	return uma_zalloc_arg(ticket_zone, data, M_WAITOK);
 }
 
 static inline void
 fticket_destroy(struct fuse_ticket *ftick)
 {
 	return uma_zfree(ticket_zone, ftick);
 }
 
 static inline
 void
 fticket_refresh(struct fuse_ticket *ftick)
 {
 	FUSE_ASSERT_MS_DONE(ftick);
 	FUSE_ASSERT_AW_DONE(ftick);
 
 	fiov_refresh(&ftick->tk_ms_fiov);
 	ftick->tk_ms_bufdata = NULL;
 	ftick->tk_ms_bufsize = 0;
 	ftick->tk_ms_type = FT_M_FIOV;
 
 	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
 
 	fiov_refresh(&ftick->tk_aw_fiov);
 	ftick->tk_aw_errno = 0;
 	ftick->tk_aw_bufdata = NULL;
 	ftick->tk_aw_bufsize = 0;
 	ftick->tk_aw_type = FT_A_FIOV;
 
 	ftick->tk_flag = 0;
 }
 
 /* Prepar the ticket to be reused, but don't clear its data buffers */
 static inline void
 fticket_reset(struct fuse_ticket *ftick)
 {
 	FUSE_ASSERT_MS_DONE(ftick);
 	FUSE_ASSERT_AW_DONE(ftick);
 
 	ftick->tk_ms_bufdata = NULL;
 	ftick->tk_ms_bufsize = 0;
 	ftick->tk_ms_type = FT_M_FIOV;
 
 	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
 
 	ftick->tk_aw_errno = 0;
 	ftick->tk_aw_bufdata = NULL;
 	ftick->tk_aw_bufsize = 0;
 	ftick->tk_aw_type = FT_A_FIOV;
 
 	ftick->tk_flag = 0;
 }
 
 static int
 fticket_wait_answer(struct fuse_ticket *ftick)
 {
 	struct thread *td = curthread;
 	sigset_t blockedset, oldset;
 	int err = 0, stops_deferred;
 	struct fuse_data *data;
 
 	if (fsess_isimpl(ftick->tk_data->mp, FUSE_INTERRUPT)) {
 		SIGEMPTYSET(blockedset);
 	} else {
 		/* May as well block all signals */
 		SIGFILLSET(blockedset);
 		SIGDELSET(blockedset, SIGKILL);
 	}
 	stops_deferred = sigdeferstop(SIGDEFERSTOP_SILENT);
 	kern_sigprocmask(td, SIG_BLOCK, NULL, &oldset, 0);
 
 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
 
 retry:
 	if (fticket_answered(ftick)) {
 		goto out;
 	}
 	data = ftick->tk_data;
 
 	if (fdata_get_dead(data)) {
 		err = ENOTCONN;
 		fticket_set_answered(ftick);
 		goto out;
 	}
 	kern_sigprocmask(td, SIG_BLOCK, &blockedset, NULL, 0);
 	err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans",
 	    data->daemon_timeout * hz);
 	kern_sigprocmask(td, SIG_SETMASK, &oldset, NULL, 0);
 	if (err == EWOULDBLOCK) {
 		SDT_PROBE2(fusefs, , ipc, trace, 3,
 			"fticket_wait_answer: EWOULDBLOCK");
 #ifdef XXXIP				/* die conditionally */
 		if (!fdata_get_dead(data)) {
 			fdata_set_dead(data);
 		}
 #endif
 		err = ETIMEDOUT;
 		fticket_set_answered(ftick);
 	} else if ((err == EINTR || err == ERESTART)) {
 		/*
 		 * Whether we get EINTR or ERESTART depends on whether
 		 * SA_RESTART was set by sigaction(2).
 		 *
 		 * Try to interrupt the operation and wait for an EINTR response
 		 * to the original operation.  If the file system does not
 		 * support FUSE_INTERRUPT, then we'll just wait for it to
 		 * complete like normal.  If it does support FUSE_INTERRUPT,
 		 * then it will either respond EINTR to the original operation,
 		 * or EAGAIN to the interrupt.
 		 */
 		int sig;
 		bool fatal;
 
 		SDT_PROBE2(fusefs, , ipc, trace, 4,
 			"fticket_wait_answer: interrupt");
 		fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
 		fuse_interrupt_send(ftick, err);
 
 		PROC_LOCK(td->td_proc);
 		mtx_lock(&td->td_proc->p_sigacts->ps_mtx);
 		sig = cursig(td);
 		fatal = sig_isfatal(td->td_proc, sig);
 		mtx_unlock(&td->td_proc->p_sigacts->ps_mtx);
 		PROC_UNLOCK(td->td_proc);
 
 		fuse_lck_mtx_lock(ftick->tk_aw_mtx);
 		if (!fatal) {
 			/* 
 			 * Block the just-delivered signal while we wait for an
 			 * interrupt response
 			 */
 			SIGADDSET(blockedset, sig);
 			goto retry;
 		} else {
 			/* Return immediately for fatal signals */
 		}
 	} else if (err) {
 		SDT_PROBE2(fusefs, , ipc, trace, 6,
 			"fticket_wait_answer: other error");
 	} else {
 		SDT_PROBE2(fusefs, , ipc, trace, 7, "fticket_wait_answer: OK");
 	}
 out:
 	if (!(err || fticket_answered(ftick))) {
 		SDT_PROBE2(fusefs, , ipc, trace, 1,
 			"FUSE: requester was woken up but still no answer");
 		err = ENXIO;
 	}
 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
 	sigallowstop(stops_deferred);
 
 	return err;
 }
 
 static	inline
 int
 fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio)
 {
 	int err = 0;
 	size_t len = uio_resid(uio);
 
 	if (len) {
 		switch (ftick->tk_aw_type) {
 		case FT_A_FIOV:
 			fiov_adjust(fticket_resp(ftick), len);
 			err = uiomove(fticket_resp(ftick)->base, len, uio);
 			break;
 
 		case FT_A_BUF:
 			ftick->tk_aw_bufsize = len;
 			err = uiomove(ftick->tk_aw_bufdata, len, uio);
 			break;
 
 		default:
 			panic("FUSE: unknown answer type for ticket %p", ftick);
 		}
 	}
 	return err;
 }
 
 int
 fticket_pull(struct fuse_ticket *ftick, struct uio *uio)
 {
 	int err = 0;
 
 	if (ftick->tk_aw_ohead.error) {
 		return 0;
 	}
 	err = fuse_body_audit(ftick, uio_resid(uio));
 	if (!err) {
 		err = fticket_aw_pull_uio(ftick, uio);
 	}
 	return err;
 }
 
 struct fuse_data *
 fdata_alloc(struct cdev *fdev, struct ucred *cred)
 {
 	struct fuse_data *data;
 
 	data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO);
 
 	data->fdev = fdev;
 	mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF);
 	STAILQ_INIT(&data->ms_head);
 	data->ms_count = 0;
 	knlist_init_mtx(&data->ks_rsel.si_note, &data->ms_mtx);
 	mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF);
 	TAILQ_INIT(&data->aw_head);
 	data->daemoncred = crhold(cred);
 	data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT;
 	sx_init(&data->rename_lock, "fuse rename lock");
 	data->ref = 1;
 
 	return data;
 }
 
 void
 fdata_trydestroy(struct fuse_data *data)
 {
 	data->ref--;
 	MPASS(data->ref >= 0);
 	if (data->ref != 0)
 		return;
 
 	/* Driving off stage all that stuff thrown at device... */
 	sx_destroy(&data->rename_lock);
 	crfree(data->daemoncred);
 	mtx_destroy(&data->aw_mtx);
 	knlist_delete(&data->ks_rsel.si_note, curthread, 0);
 	knlist_destroy(&data->ks_rsel.si_note);
 	mtx_destroy(&data->ms_mtx);
 
 	free(data, M_FUSEMSG);
 }
 
 void
 fdata_set_dead(struct fuse_data *data)
 {
 	FUSE_LOCK();
 	if (fdata_get_dead(data)) {
 		FUSE_UNLOCK();
 		return;
 	}
 	fuse_lck_mtx_lock(data->ms_mtx);
 	data->dataflags |= FSESS_DEAD;
 	wakeup_one(data);
 	selwakeuppri(&data->ks_rsel, PZERO + 1);
 	wakeup(&data->ticketer);
 	fuse_lck_mtx_unlock(data->ms_mtx);
 	FUSE_UNLOCK();
 }
 
 struct fuse_ticket *
 fuse_ticket_fetch(struct fuse_data *data)
 {
 	int err = 0;
 	struct fuse_ticket *ftick;
 
 	ftick = fticket_alloc(data);
 
 	if (!(data->dataflags & FSESS_INITED)) {
 		/* Sleep until get answer for INIT messsage */
 		FUSE_LOCK();
 		if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) {
 			err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP,
 			    "fu_ini", 0);
 			if (err)
 				fdata_set_dead(data);
 		} else
 			FUSE_UNLOCK();
 	}
 	return ftick;
 }
 
 int
 fuse_ticket_drop(struct fuse_ticket *ftick)
 {
 	int die;
 
 	die = refcount_release(&ftick->tk_refcount);
 	if (die)
 		fticket_destroy(ftick);
 
 	return die;
 }
 
 void
 fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler)
 {
 	if (fdata_get_dead(ftick->tk_data)) {
 		return;
 	}
 	ftick->tk_aw_handler = handler;
 
 	fuse_lck_mtx_lock(ftick->tk_data->aw_mtx);
 	fuse_aw_push(ftick);
 	fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx);
 }
 
 /*
  * Insert a new upgoing ticket into the message queue
  *
  * If urgent is true, insert at the front of the queue.  Otherwise, insert in
  * FIFO order.
  */
 void
 fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
 {
 	if (ftick->tk_flag & FT_DIRTY) {
 		panic("FUSE: ticket reused without being refreshed");
 	}
 	ftick->tk_flag |= FT_DIRTY;
 
 	if (fdata_get_dead(ftick->tk_data)) {
 		return;
 	}
 	fuse_lck_mtx_lock(ftick->tk_data->ms_mtx);
 	if (urgent)
 		fuse_ms_push_head(ftick);
 	else
 		fuse_ms_push(ftick);
 	wakeup_one(ftick->tk_data);
 	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
 	KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
 	fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
 }
 
 static int
 fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
 {
 	int err = 0;
 	enum fuse_opcode opcode;
 
 	opcode = fticket_opcode(ftick);
 
 	switch (opcode) {
+	case FUSE_BMAP:
+		err = (blen == sizeof(struct fuse_bmap_out)) ? 0 : EINVAL;
+		break;
+
 	case FUSE_LINK:
 	case FUSE_LOOKUP:
 	case FUSE_MKDIR:
 	case FUSE_MKNOD:
 	case FUSE_SYMLINK:
 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
 			err = (blen == sizeof(struct fuse_entry_out)) ?
 				0 : EINVAL;
 		} else {
 			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE) ? 0 : EINVAL;
 		}
 		break;
 
 	case FUSE_FORGET:
 		panic("FUSE: a handler has been intalled for FUSE_FORGET");
 		break;
 
 	case FUSE_GETATTR:
 	case FUSE_SETATTR:
 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
 			err = (blen == sizeof(struct fuse_attr_out)) ? 
 			  0 : EINVAL;
 		} else {
 			err = (blen == FUSE_COMPAT_ATTR_OUT_SIZE) ? 0 : EINVAL;
 		}
 		break;
 
 	case FUSE_READLINK:
 		err = (PAGE_SIZE >= blen) ? 0 : EINVAL;
 		break;
 
 	case FUSE_UNLINK:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_RMDIR:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_RENAME:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_OPEN:
 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_READ:
 		err = (((struct fuse_read_in *)(
 		    (char *)ftick->tk_ms_fiov.base +
 		    sizeof(struct fuse_in_header)
 		    ))->size >= blen) ? 0 : EINVAL;
 		break;
 
 	case FUSE_WRITE:
 		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_STATFS:
 		if (fuse_libabi_geq(ftick->tk_data, 7, 4)) {
 			err = (blen == sizeof(struct fuse_statfs_out)) ? 
 			  0 : EINVAL;
 		} else {
 			err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL;
 		}
 		break;
 
 	case FUSE_RELEASE:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_FSYNC:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_SETXATTR:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_GETXATTR:
 	case FUSE_LISTXATTR:
 		/*
 		 * These can have varying response lengths, and 0 length
 		 * isn't necessarily invalid.
 		 */
 		err = 0;
 		break;
 
 	case FUSE_REMOVEXATTR:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_FLUSH:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_INIT:
 		if (blen == sizeof(struct fuse_init_out) || blen == 8) {
 			err = 0;
 		} else {
 			err = EINVAL;
 		}
 		break;
 
 	case FUSE_OPENDIR:
 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_READDIR:
 		err = (((struct fuse_read_in *)(
 		    (char *)ftick->tk_ms_fiov.base +
 		    sizeof(struct fuse_in_header)
 		    ))->size >= blen) ? 0 : EINVAL;
 		break;
 
 	case FUSE_RELEASEDIR:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_FSYNCDIR:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_GETLK:
 		err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_SETLK:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_SETLKW:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_ACCESS:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_CREATE:
 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
 			err = (blen == sizeof(struct fuse_entry_out) +
 			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
 		} else {
 			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE +
 			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
 		}
 		break;
 
 	case FUSE_DESTROY:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	default:
 		panic("FUSE: opcodes out of sync (%d)\n", opcode);
 	}
 
 	return err;
 }
 
 static inline void
 fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick,
     uint64_t nid, enum fuse_opcode op, size_t blen, pid_t pid,
     struct ucred *cred)
 {
 	ihead->len = sizeof(*ihead) + blen;
 	ihead->unique = ftick->tk_unique;
 	ihead->nodeid = nid;
 	ihead->opcode = op;
 
 	ihead->pid = pid;
 	ihead->uid = cred->cr_uid;
 	ihead->gid = cred->cr_groups[0];
 }
 
 /*
  * fuse_standard_handler just pulls indata and wakes up pretender.
  * Doesn't try to interpret data, that's left for the pretender.
  * Though might do a basic size verification before the pull-in takes place
  */
 
 static int
 fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio)
 {
 	int err = 0;
 
 	err = fticket_pull(ftick, uio);
 
 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
 
 	if (!fticket_answered(ftick)) {
 		fticket_set_answered(ftick);
 		ftick->tk_aw_errno = err;
 		wakeup(ftick);
 	}
 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
 
 	return err;
 }
 
 /*
  * Reinitialize a dispatcher from a pid and node id, without resizing or
  * clearing its data buffers
  */
 static void
 fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
 {
 	MPASS(fdip->tick);
 	MPASS2(sizeof(fdip->finh) + fdip->iosize <= fdip->tick->tk_ms_fiov.len,
 		"Must use fdisp_make_pid to increase the size of the fiov");
 	fticket_reset(fdip->tick);
 
 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
 	    fdip->indata, fdip->iosize);
 
 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid,
 		cred);
 }
 
 /* Initialize a dispatcher from a pid and node id */
 static void
 fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred)
 {
 	if (fdip->tick) {
 		fticket_refresh(fdip->tick);
 	} else {
 		fdip->tick = fuse_ticket_fetch(data);
 	}
 
 	/* FUSE_DIMALLOC will bzero the fiovs when it enlarges them */
 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
 	    fdip->indata, fdip->iosize);
 
 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred);
 }
 
 void
 fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp,
     uint64_t nid, struct thread *td, struct ucred *cred)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 	RECTIFY_TDCR(td, cred);
 
 	return fdisp_make_pid(fdip, op, data, nid, td->td_proc->p_pid, cred);
 }
 
 void
 fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct vnode *vp, struct thread *td, struct ucred *cred)
 {
 	struct mount *mp = vnode_mount(vp);
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	RECTIFY_TDCR(td, cred);
 	return fdisp_make_pid(fdip, op, data, VTOI(vp),
 	    td->td_proc->p_pid, cred);
 }
 
 /* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */
 void
 fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct vnode *vp, struct thread *td, struct ucred *cred)
 {
 	RECTIFY_TDCR(td, cred);
 	return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp),
 	    td->td_proc->p_pid, cred);
 }
 
 void
 fdisp_refresh(struct fuse_dispatcher *fdip)
 {
 	fticket_refresh(fdip->tick);
 }
 
 SDT_PROBE_DEFINE2(fusefs, , ipc, fdisp_wait_answ_error, "char*", "int");
 
 int
 fdisp_wait_answ(struct fuse_dispatcher *fdip)
 {
 	int err = 0;
 
 	fdip->answ_stat = 0;
 	fuse_insert_callback(fdip->tick, fuse_standard_handler);
 	fuse_insert_message(fdip->tick, false);
 
 	if ((err = fticket_wait_answer(fdip->tick))) {
 		fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx);
 
 		if (fticket_answered(fdip->tick)) {
 			/*
 	                 * Just between noticing the interrupt and getting here,
 	                 * the standard handler has completed his job.
 	                 * So we drop the ticket and exit as usual.
 	                 */
 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
 				"IPC: interrupted, already answered", err);
 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
 			goto out;
 		} else {
 			/*
 	                 * So we were faster than the standard handler.
 	                 * Then by setting the answered flag we get *him*
 	                 * to drop the ticket.
 	                 */
 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
 				"IPC: interrupted, setting to answered", err);
 			fticket_set_answered(fdip->tick);
 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
 			return err;
 		}
 	}
 
 	if (fdip->tick->tk_aw_errno == ENOTCONN) {
 		/* The daemon died while we were waiting for a response */
 		err = ENOTCONN;
 		goto out;
 	} else if (fdip->tick->tk_aw_errno) {
 		/* 
 		 * There was some sort of communication error with the daemon
 		 * that the client wouldn't understand.
 		 */
 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
 			"IPC: explicit EIO-ing", fdip->tick->tk_aw_errno);
 		err = EIO;
 		goto out;
 	}
 	if ((err = fdip->tick->tk_aw_ohead.error)) {
 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
 			"IPC: setting status", fdip->tick->tk_aw_ohead.error);
 		/*
 	         * This means a "proper" fuse syscall error.
 	         * We record this value so the caller will
 	         * be able to know it's not a boring messaging
 	         * failure, if she wishes so (and if not, she can
 	         * just simply propagate the return value of this routine).
 	         * [XXX Maybe a bitflag would do the job too,
 	         * if other flags needed, this will be converted thusly.]
 	         */
 		fdip->answ_stat = err;
 		goto out;
 	}
 	fdip->answ = fticket_resp(fdip->tick)->base;
 	fdip->iosize = fticket_resp(fdip->tick)->len;
 
 	return 0;
 
 out:
 	return err;
 }
 
 void
 fuse_ipc_init(void)
 {
 	ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket),
 	    fticket_ctor, fticket_dtor, fticket_init, fticket_fini,
 	    UMA_ALIGN_PTR, 0);
 }
 
 void
 fuse_ipc_destroy(void)
 {
 	uma_zdestroy(ticket_zone);
 }
Index: projects/fuse2/sys/fs/fuse/fuse_ipc.h
===================================================================
--- projects/fuse2/sys/fs/fuse/fuse_ipc.h	(revision 349237)
+++ projects/fuse2/sys/fs/fuse/fuse_ipc.h	(revision 349238)
@@ -1,425 +1,425 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
  * All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  * 
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * 
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _FUSE_IPC_H_
 #define _FUSE_IPC_H_
 
 #include <sys/param.h>
 #include <sys/refcount.h>
 
 struct fuse_iov {
 	void   *base;
 	size_t  len;
 	size_t  allocated_size;
 	int     credit;
 };
 
 void fiov_init(struct fuse_iov *fiov, size_t size);
 void fiov_teardown(struct fuse_iov *fiov);
 void fiov_refresh(struct fuse_iov *fiov);
 void fiov_adjust(struct fuse_iov *fiov, size_t size);
 
 #define FUSE_DIMALLOC(fiov, spc1, spc2, amnt) do {		\
 	fiov_adjust(fiov, (sizeof(*(spc1)) + (amnt)));		\
 	(spc1) = (fiov)->base;					\
 	(spc2) = (char *)(fiov)->base + (sizeof(*(spc1)));	\
 } while (0)
 
 #define FU_AT_LEAST(siz) max((siz), 160)
 
 #define FUSE_ASSERT_AW_DONE(ftick)					\
 	KASSERT((ftick)->tk_aw_link.tqe_next == NULL &&			\
 	    (ftick)->tk_aw_link.tqe_prev == NULL,			\
 	    ("FUSE: ticket still on answer delivery list %p", (ftick)))
 
 #define FUSE_ASSERT_MS_DONE(ftick)				\
 	KASSERT((ftick)->tk_ms_link.stqe_next == NULL,		\
 	    ("FUSE: ticket still on message list %p", (ftick)))
 
 struct fuse_ticket;
 struct fuse_data;
 
 typedef int fuse_handler_t(struct fuse_ticket *ftick, struct uio *uio);
 
 struct fuse_ticket {
 	/* fields giving the identity of the ticket */
 	uint64_t			tk_unique;
 	struct fuse_data		*tk_data;
 	int				tk_flag;
 	u_int				tk_refcount;
 	/* 
 	 * If this ticket's operation has been interrupted, this will hold the
 	 * unique value of the FUSE_INTERRUPT operation.  Otherwise, it will be
 	 * 0.
 	 */
 	uint64_t			irq_unique;
 
 	/* fields for initiating an upgoing message */
 	struct fuse_iov			tk_ms_fiov;
 	void				*tk_ms_bufdata;
 	size_t				tk_ms_bufsize;
 	enum { FT_M_FIOV, FT_M_BUF }	tk_ms_type;
 	STAILQ_ENTRY(fuse_ticket)	tk_ms_link;
 
 	/* fields for handling answers coming from userspace */
 	struct fuse_iov			tk_aw_fiov;
 	void				*tk_aw_bufdata;
 	size_t				tk_aw_bufsize;
 	enum { FT_A_FIOV, FT_A_BUF }	tk_aw_type;
 
 	struct fuse_out_header		tk_aw_ohead;
 	int				tk_aw_errno;
 	struct mtx			tk_aw_mtx;
 	fuse_handler_t			*tk_aw_handler;
 	TAILQ_ENTRY(fuse_ticket)	tk_aw_link;
 };
 
 #define FT_ANSW  0x01  /* request of ticket has already been answered */
 #define FT_DIRTY 0x04  /* ticket has been used */
 
 static inline struct fuse_iov *
 fticket_resp(struct fuse_ticket *ftick)
 {
 	return (&ftick->tk_aw_fiov);
 }
 
 static inline bool
 fticket_answered(struct fuse_ticket *ftick)
 {
 	mtx_assert(&ftick->tk_aw_mtx, MA_OWNED);
 	return (ftick->tk_flag & FT_ANSW);
 }
 
 static inline void
 fticket_set_answered(struct fuse_ticket *ftick)
 {
 	mtx_assert(&ftick->tk_aw_mtx, MA_OWNED);
 	ftick->tk_flag |= FT_ANSW;
 }
 
 static inline struct fuse_in_header*
 fticket_in_header(struct fuse_ticket *ftick)
 {
 	return (struct fuse_in_header *)(ftick->tk_ms_fiov.base);
 }
 
 static inline enum fuse_opcode
 fticket_opcode(struct fuse_ticket *ftick)
 {
 	return fticket_in_header(ftick)->opcode;
 }
 
 int fticket_pull(struct fuse_ticket *ftick, struct uio *uio);
 
 /*
  * The data representing a FUSE session.
  */
 struct fuse_data {
 	struct cdev			*fdev;
 	struct mount			*mp;
 	struct vnode			*vroot;
 	struct ucred			*daemoncred;
 	int				dataflags;
 	int				ref;
 
 	struct mtx			ms_mtx;
 	STAILQ_HEAD(, fuse_ticket)	ms_head;
 	int				ms_count;
 
 	struct mtx			aw_mtx;
 	TAILQ_HEAD(, fuse_ticket)	aw_head;
 
 	/* 
 	 * Holds the next value of the FUSE operation unique value.
 	 * Also, serves as a wakeup channel to prevent any operations from
 	 * being created before INIT completes.
 	 */
 	u_long				ticketer;
 
 	struct sx			rename_lock;
 
 	uint32_t			fuse_libabi_major;
 	uint32_t			fuse_libabi_minor;
 
-	uint32_t			max_readahead;
+	uint32_t			max_readahead_blocks;
 	uint32_t			max_write;
 	uint32_t			max_read;
 	uint32_t			subtype;
 	char				volname[MAXPATHLEN];
 
 	struct selinfo			ks_rsel;
 
 	int				daemon_timeout;
 	uint64_t			notimpl;
 	uint64_t			mnt_flag;
 };
 
 #define FSESS_DEAD                0x0001 /* session is to be closed */
 #define FSESS_UNUSED0             0x0002 /* unused */
 #define FSESS_INITED              0x0004 /* session has been inited */
 #define FSESS_DAEMON_CAN_SPY      0x0010 /* let non-owners access this fs */
                                          /* (and being observed by the daemon) */
 #define FSESS_PUSH_SYMLINKS_IN    0x0020 /* prefix absolute symlinks with mp */
 #define FSESS_DEFAULT_PERMISSIONS 0x0040 /* kernel does permission checking */
 #define FSESS_NO_ATTRCACHE        0x0080 /* no attribute caching */
 #define FSESS_NO_READAHEAD        0x0100 /* no readaheads */
 #define FSESS_NO_DATACACHE        0x0200 /* disable buffer cache */
 #define FSESS_NO_NAMECACHE        0x0400 /* disable name cache */
 #define FSESS_NO_MMAP             0x0800 /* disable mmap */
 #define FSESS_ASYNC_READ          0x1000 /* allow multiple reads of some file */
 #define FSESS_POSIX_LOCKS         0x2000 /* daemon supports POSIX locks */
 #define FSESS_EXPORT_SUPPORT      0x10000 /* daemon supports NFS-style lookups */
 #define FSESS_MNTOPTS_MASK	( \
 	FSESS_DAEMON_CAN_SPY | FSESS_PUSH_SYMLINKS_IN | \
 	FSESS_DEFAULT_PERMISSIONS | FSESS_NO_ATTRCACHE | \
 	FSESS_NO_READAHEAD | FSESS_NO_DATACACHE | \
 	FSESS_NO_NAMECACHE | FSESS_NO_MMAP)
 
 enum fuse_data_cache_mode {
 	FUSE_CACHE_UC,
 	FUSE_CACHE_WT,
 	FUSE_CACHE_WB,
 };
 
 extern int fuse_data_cache_mode;
 
 static inline struct fuse_data *
 fuse_get_mpdata(struct mount *mp)
 {
 	return mp->mnt_data;
 }
 
 static inline bool
 fsess_isimpl(struct mount *mp, int opcode)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	return ((data->notimpl & (1ULL << opcode)) == 0);
 
 }
 static inline void
 fsess_set_notimpl(struct mount *mp, int opcode)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	data->notimpl |= (1ULL << opcode);
 }
 
 static inline bool
 fsess_opt_datacache(struct mount *mp)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	return (fuse_data_cache_mode != FUSE_CACHE_UC &&
 	    (data->dataflags & FSESS_NO_DATACACHE) == 0);
 }
 
 static inline bool
 fsess_opt_mmap(struct mount *mp)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	if (fuse_data_cache_mode == FUSE_CACHE_UC)
 		return (false);
 	return ((data->dataflags & (FSESS_NO_DATACACHE | FSESS_NO_MMAP)) == 0);
 }
 
 /* Insert a new upgoing message */
 static inline void
 fuse_ms_push(struct fuse_ticket *ftick)
 {
 	mtx_assert(&ftick->tk_data->ms_mtx, MA_OWNED);
 	refcount_acquire(&ftick->tk_refcount);
 	STAILQ_INSERT_TAIL(&ftick->tk_data->ms_head, ftick, tk_ms_link);
 	ftick->tk_data->ms_count++;
 }
 
 /* Insert a new upgoing message to the front of the queue */
 static inline void
 fuse_ms_push_head(struct fuse_ticket *ftick)
 {
 	mtx_assert(&ftick->tk_data->ms_mtx, MA_OWNED);
 	refcount_acquire(&ftick->tk_refcount);
 	STAILQ_INSERT_HEAD(&ftick->tk_data->ms_head, ftick, tk_ms_link);
 	ftick->tk_data->ms_count++;
 }
 
 static inline struct fuse_ticket *
 fuse_ms_pop(struct fuse_data *data)
 {
 	struct fuse_ticket *ftick = NULL;
 
 	mtx_assert(&data->ms_mtx, MA_OWNED);
 
 	if ((ftick = STAILQ_FIRST(&data->ms_head))) {
 		STAILQ_REMOVE_HEAD(&data->ms_head, tk_ms_link);
 		data->ms_count--;
 #ifdef INVARIANTS
 		MPASS(data->ms_count >= 0);
 		ftick->tk_ms_link.stqe_next = NULL;
 #endif
 	}
 
 	return (ftick);
 }
 
 static inline void
 fuse_aw_push(struct fuse_ticket *ftick)
 {
 	mtx_assert(&ftick->tk_data->aw_mtx, MA_OWNED);
 	refcount_acquire(&ftick->tk_refcount);
 	TAILQ_INSERT_TAIL(&ftick->tk_data->aw_head, ftick, tk_aw_link);
 }
 
 static inline void
 fuse_aw_remove(struct fuse_ticket *ftick)
 {
 	mtx_assert(&ftick->tk_data->aw_mtx, MA_OWNED);
 	TAILQ_REMOVE(&ftick->tk_data->aw_head, ftick, tk_aw_link);
 #ifdef INVARIANTS
 	ftick->tk_aw_link.tqe_next = NULL;
 	ftick->tk_aw_link.tqe_prev = NULL;
 #endif
 }
 
 static inline struct fuse_ticket *
 fuse_aw_pop(struct fuse_data *data)
 {
 	struct fuse_ticket *ftick;
 
 	mtx_assert(&data->aw_mtx, MA_OWNED);
 
 	if ((ftick = TAILQ_FIRST(&data->aw_head)) != NULL)
 		fuse_aw_remove(ftick);
 
 	return (ftick);
 }
 
 struct fuse_ticket *fuse_ticket_fetch(struct fuse_data *data);
 int fuse_ticket_drop(struct fuse_ticket *ftick);
 void fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t *handler);
 void fuse_insert_message(struct fuse_ticket *ftick, bool irq);
 
 static inline bool
 fuse_libabi_geq(struct fuse_data *data, uint32_t abi_maj, uint32_t abi_min)
 {
 	return (data->fuse_libabi_major > abi_maj ||
 	    (data->fuse_libabi_major == abi_maj &&
 	     data->fuse_libabi_minor >= abi_min));
 }
 
 struct fuse_data *fdata_alloc(struct cdev *dev, struct ucred *cred);
 void fdata_trydestroy(struct fuse_data *data);
 void fdata_set_dead(struct fuse_data *data);
 
 static inline bool
 fdata_get_dead(struct fuse_data *data)
 {
 	return (data->dataflags & FSESS_DEAD);
 }
 
 struct fuse_dispatcher {
 	struct fuse_ticket    *tick;
 	struct fuse_in_header *finh;
 
 	void    *indata;
 	size_t   iosize;
 	uint64_t nodeid;
 	int      answ_stat;
 	void    *answ;
 };
 
 static inline void
 fdisp_init(struct fuse_dispatcher *fdisp, size_t iosize)
 {
 	fdisp->iosize = iosize;
 	fdisp->tick = NULL;
 }
 
 static inline void
 fdisp_destroy(struct fuse_dispatcher *fdisp)
 {
 	fuse_ticket_drop(fdisp->tick);
 #ifdef INVARIANTS
 	fdisp->tick = NULL;
 #endif
 }
 
 void fdisp_refresh(struct fuse_dispatcher *fdip);
 
 void fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct mount *mp, uint64_t nid, struct thread *td, struct ucred *cred);
 
 void fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct vnode *vp, struct thread *td, struct ucred *cred);
 
 void fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct vnode *vp, struct thread *td, struct ucred *cred);
 
 int fdisp_wait_answ(struct fuse_dispatcher *fdip);
 
 static inline int
 fdisp_simple_putget_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct vnode *vp, struct thread *td, struct ucred *cred)
 {
 	fdisp_make_vp(fdip, op, vp, td, cred);
 	return (fdisp_wait_answ(fdip));
 }
 
 #endif /* _FUSE_IPC_H_ */
Index: projects/fuse2/sys/fs/fuse/fuse_vnops.c
===================================================================
--- projects/fuse2/sys/fs/fuse/fuse_vnops.c	(revision 349237)
+++ projects/fuse2/sys/fs/fuse/fuse_vnops.c	(revision 349238)
@@ -1,2582 +1,2670 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/extattr.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/filedesc.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/dirent.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_object.h>
 
 #include "fuse.h"
 #include "fuse_file.h"
 #include "fuse_internal.h"
 #include "fuse_ipc.h"
 #include "fuse_node.h"
 #include "fuse_io.h"
 
 #include <sys/priv.h>
 
 /* Maximum number of hardlinks to a single FUSE file */
 #define FUSE_LINK_MAX                      UINT32_MAX
 
 SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*");
 
 /* vnode ops */
 static vop_access_t fuse_vnop_access;
 static vop_advlock_t fuse_vnop_advlock;
+static vop_bmap_t fuse_vnop_bmap;
 static vop_close_t fuse_fifo_close;
 static vop_close_t fuse_vnop_close;
 static vop_create_t fuse_vnop_create;
 static vop_deleteextattr_t fuse_vnop_deleteextattr;
 static vop_fdatasync_t fuse_vnop_fdatasync;
 static vop_fsync_t fuse_vnop_fsync;
 static vop_getattr_t fuse_vnop_getattr;
 static vop_getextattr_t fuse_vnop_getextattr;
 static vop_inactive_t fuse_vnop_inactive;
 static vop_link_t fuse_vnop_link;
 static vop_listextattr_t fuse_vnop_listextattr;
 static vop_lookup_t fuse_vnop_lookup;
 static vop_mkdir_t fuse_vnop_mkdir;
 static vop_mknod_t fuse_vnop_mknod;
 static vop_open_t fuse_vnop_open;
 static vop_pathconf_t fuse_vnop_pathconf;
 static vop_read_t fuse_vnop_read;
 static vop_readdir_t fuse_vnop_readdir;
 static vop_readlink_t fuse_vnop_readlink;
 static vop_reclaim_t fuse_vnop_reclaim;
 static vop_remove_t fuse_vnop_remove;
 static vop_rename_t fuse_vnop_rename;
 static vop_rmdir_t fuse_vnop_rmdir;
 static vop_setattr_t fuse_vnop_setattr;
 static vop_setextattr_t fuse_vnop_setextattr;
 static vop_strategy_t fuse_vnop_strategy;
 static vop_symlink_t fuse_vnop_symlink;
 static vop_write_t fuse_vnop_write;
 static vop_getpages_t fuse_vnop_getpages;
 static vop_putpages_t fuse_vnop_putpages;
 static vop_print_t fuse_vnop_print;
 static vop_vptofh_t fuse_vnop_vptofh;
 
 struct vop_vector fuse_fifoops = {
 	.vop_default =		&fifo_specops,
 	.vop_access =		fuse_vnop_access,
 	.vop_close =		fuse_fifo_close,
 	.vop_fsync =		fuse_vnop_fsync,
 	.vop_getattr =		fuse_vnop_getattr,
 	.vop_inactive =		fuse_vnop_inactive,
 	.vop_pathconf =		fuse_vnop_pathconf,
 	.vop_print =		fuse_vnop_print,
 	.vop_read =		VOP_PANIC,
 	.vop_reclaim =		fuse_vnop_reclaim,
 	.vop_setattr =		fuse_vnop_setattr,
 	.vop_write =		VOP_PANIC,
 	.vop_vptofh =		fuse_vnop_vptofh,
 };
 
 struct vop_vector fuse_vnops = {
 	.vop_allocate =	VOP_EINVAL,
 	.vop_default = &default_vnodeops,
 	.vop_access = fuse_vnop_access,
 	.vop_advlock = fuse_vnop_advlock,
+	.vop_bmap = fuse_vnop_bmap,
 	.vop_close = fuse_vnop_close,
 	.vop_create = fuse_vnop_create,
 	.vop_deleteextattr = fuse_vnop_deleteextattr,
 	.vop_fsync = fuse_vnop_fsync,
 	.vop_fdatasync = fuse_vnop_fdatasync,
 	.vop_getattr = fuse_vnop_getattr,
 	.vop_getextattr = fuse_vnop_getextattr,
 	.vop_inactive = fuse_vnop_inactive,
 	/*
 	 * TODO: implement vop_ioctl after upgrading to protocol 7.16.
 	 * FUSE_IOCTL was added in 7.11, but 32-bit compat is broken until
 	 * 7.16.
 	 */
 	.vop_link = fuse_vnop_link,
 	.vop_listextattr = fuse_vnop_listextattr,
 	.vop_lookup = fuse_vnop_lookup,
 	.vop_mkdir = fuse_vnop_mkdir,
 	.vop_mknod = fuse_vnop_mknod,
 	.vop_open = fuse_vnop_open,
 	.vop_pathconf = fuse_vnop_pathconf,
 	/*
 	 * TODO: implement vop_poll after upgrading to protocol 7.21.
 	 * FUSE_POLL was added in protocol 7.11, but it's kind of broken until
 	 * 7.21, which adds the ability for the client to choose which poll
 	 * events it wants, and for a client to deregister a file handle
 	 */
 	.vop_read = fuse_vnop_read,
 	.vop_readdir = fuse_vnop_readdir,
 	.vop_readlink = fuse_vnop_readlink,
 	.vop_reclaim = fuse_vnop_reclaim,
 	.vop_remove = fuse_vnop_remove,
 	.vop_rename = fuse_vnop_rename,
 	.vop_rmdir = fuse_vnop_rmdir,
 	.vop_setattr = fuse_vnop_setattr,
 	.vop_setextattr = fuse_vnop_setextattr,
 	.vop_strategy = fuse_vnop_strategy,
 	.vop_symlink = fuse_vnop_symlink,
 	.vop_write = fuse_vnop_write,
 	.vop_getpages = fuse_vnop_getpages,
 	.vop_putpages = fuse_vnop_putpages,
 	.vop_print = fuse_vnop_print,
 	.vop_vptofh = fuse_vnop_vptofh,
 };
 
 u_long fuse_lookup_cache_hits = 0;
 
 SYSCTL_ULONG(_vfs_fusefs, OID_AUTO, lookup_cache_hits, CTLFLAG_RD,
     &fuse_lookup_cache_hits, 0, "number of positive cache hits in lookup");
 
 u_long fuse_lookup_cache_misses = 0;
 
 SYSCTL_ULONG(_vfs_fusefs, OID_AUTO, lookup_cache_misses, CTLFLAG_RD,
     &fuse_lookup_cache_misses, 0, "number of cache misses in lookup");
 
 /*
  * XXX: This feature is highly experimental and can bring to instabilities,
  * needs revisiting before to be enabled by default.
  */
 static int fuse_reclaim_revoked = 0;
 
 SYSCTL_INT(_vfs_fusefs, OID_AUTO, reclaim_revoked, CTLFLAG_RW,
     &fuse_reclaim_revoked, 0, "");
 
 uma_zone_t fuse_pbuf_zone;
 
 #define fuse_vm_page_lock(m)		vm_page_lock((m));
 #define fuse_vm_page_unlock(m)		vm_page_unlock((m));
 #define fuse_vm_page_lock_queues()	((void)0)
 #define fuse_vm_page_unlock_queues()	((void)0)
 
 /* Check permission for extattr operations, much like extattr_check_cred */
 static int
 fuse_extattr_check_cred(struct vnode *vp, int ns, struct ucred *cred,
 	struct thread *td, accmode_t accmode)
 {
 	struct mount *mp = vnode_mount(vp);
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
 	/*
 	 * Kernel-invoked always succeeds.
 	 */
 	if (cred == NOCRED)
 		return (0);
 
 	/*
 	 * Do not allow privileged processes in jail to directly manipulate
 	 * system attributes.
 	 */
 	switch (ns) {
 	case EXTATTR_NAMESPACE_SYSTEM:
 		if (data->dataflags & FSESS_DEFAULT_PERMISSIONS) {
 			return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM));
 		}
 		/* FALLTHROUGH */
 	case EXTATTR_NAMESPACE_USER:
 		return (fuse_internal_access(vp, accmode, td, cred));
 	default:
 		return (EPERM);
 	}
 }
 
 /* Get a filehandle for a directory */
 static int
 fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp,
 	struct ucred *cred, pid_t pid)
 {
 	if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0)
 		return 0;
 	return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid);
 }
 
 /* Send FUSE_FLUSH for this vnode */
 static int
 fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
 {
 	struct fuse_flush_in *ffi;
 	struct fuse_filehandle *fufh;
 	struct fuse_dispatcher fdi;
 	struct thread *td = curthread;
 	struct mount *mp = vnode_mount(vp);
 	int err;
 
 	if (!fsess_isimpl(vnode_mount(vp), FUSE_FLUSH))
 		return 0;
 
 	err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
 	if (err)
 		return err;
 
 	fdisp_init(&fdi, sizeof(*ffi));
 	fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred);
 	ffi = fdi.indata;
 	ffi->fh = fufh->fh_id;
 	/* 
 	 * If the file has a POSIX lock then we're supposed to set lock_owner.
 	 * If not, then lock_owner is undefined.  So we may as well always set
 	 * it.
 	 */
 	ffi->lock_owner = td->td_proc->p_pid;
 
 	err = fdisp_wait_answ(&fdi);
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_FLUSH);
 		err = 0;
 	}
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /* Close wrapper for fifos.  */
 static int
 fuse_fifo_close(struct vop_close_args *ap)
 {
 	return (fifo_specops.vop_close(ap));
 }
 
 /*
     struct vnop_access_args {
 	struct vnode *a_vp;
 #if VOP_ACCESS_TAKES_ACCMODE_T
 	accmode_t a_accmode;
 #else
 	int a_mode;
 #endif
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	int accmode = ap->a_accmode;
 	struct ucred *cred = ap->a_cred;
 
 	struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
 
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		if (vnode_isvroot(vp)) {
 			return 0;
 		}
 		return ENXIO;
 	}
 	if (!(data->dataflags & FSESS_INITED)) {
 		if (vnode_isvroot(vp)) {
 			if (priv_check_cred(cred, PRIV_VFS_ADMIN) ||
 			    (fuse_match_cred(data->daemoncred, cred) == 0)) {
 				return 0;
 			}
 		}
 		return EBADF;
 	}
 	if (vnode_islnk(vp)) {
 		return 0;
 	}
 
 	err = fuse_internal_access(vp, accmode, ap->a_td, ap->a_cred);
 	return err;
 }
 
 /*
  * struct vop_advlock_args {
  *	struct vop_generic_args a_gen;
  *	struct vnode *a_vp;
  *	void *a_id;
  *	int a_op;
  *	struct flock *a_fl;
  *	int a_flags;
  * }
  */
 static int
 fuse_vnop_advlock(struct vop_advlock_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct flock *fl = ap->a_fl;
 	struct thread *td = curthread;
 	struct ucred *cred = td->td_ucred;
 	pid_t pid = td->td_proc->p_pid;
 	struct fuse_filehandle *fufh;
 	struct fuse_dispatcher fdi;
 	struct fuse_lk_in *fli;
 	struct fuse_lk_out *flo;
 	enum fuse_opcode op;
 	int dataflags, err;
 	int flags = ap->a_flags;
 
 	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 
 	if (!(dataflags & FSESS_POSIX_LOCKS))
 		return vop_stdadvlock(ap);
 	/* FUSE doesn't properly support flock until protocol 7.17 */
 	if (flags & F_FLOCK)
 		return vop_stdadvlock(ap);
 
 	err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid);
 	if (err)
 		return err;
 
 	fdisp_init(&fdi, sizeof(*fli));
 
 	switch(ap->a_op) {
 	case F_GETLK:
 		op = FUSE_GETLK;
 		break;
 	case F_SETLK:
 		op = FUSE_SETLK;
 		break;
 	case F_SETLKW:
 		op = FUSE_SETLKW;
 		break;
 	default:
 		return EINVAL;
 	}
 
 	fdisp_make_vp(&fdi, op, vp, td, cred);
 	fli = fdi.indata;
 	fli->fh = fufh->fh_id;
 	fli->owner = fl->l_pid;
 	fli->lk.start = fl->l_start;
 	if (fl->l_len != 0)
 		fli->lk.end = fl->l_start + fl->l_len - 1;
 	else
 		fli->lk.end = INT64_MAX;
 	fli->lk.type = fl->l_type;
 	fli->lk.pid = fl->l_pid;
 
 	err = fdisp_wait_answ(&fdi);
 	fdisp_destroy(&fdi);
 
 	if (err == 0 && op == FUSE_GETLK) {
 		flo = fdi.answ;
 		fl->l_type = flo->lk.type;
 		fl->l_pid = flo->lk.pid;
 		if (flo->lk.type != F_UNLCK) {
 			fl->l_start = flo->lk.start;
 			if (flo->lk.end == INT64_MAX)
 				fl->l_len = 0;
 			else
 				fl->l_len = flo->lk.end - flo->lk.start + 1;
 			fl->l_start = flo->lk.start;
 		}
 	}
 
 	return err;
+}
+
+/* {
+	struct vnode *a_vp;
+	daddr_t a_bn;
+	struct bufobj **a_bop;
+	daddr_t *a_bnp;
+	int *a_runp;
+	int *a_runb;
+} */
+static int
+fuse_vnop_bmap(struct vop_bmap_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct bufobj **bo = ap->a_bop;
+	struct thread *td = curthread;
+	struct mount *mp;
+	struct fuse_dispatcher fdi;
+	struct fuse_bmap_in *fbi;
+	struct fuse_bmap_out *fbo;
+	struct fuse_data *data;
+	uint64_t biosize;
+	off_t filesize;
+	daddr_t lbn = ap->a_bn;
+	daddr_t *pbn = ap->a_bnp;
+	int *runp = ap->a_runp;
+	int *runb = ap->a_runb;
+	int error = 0;
+	int maxrun;
+
+	if (fuse_isdeadfs(vp)) {
+		return ENXIO;
+	}
+
+	mp = vnode_mount(vp);
+	data = fuse_get_mpdata(mp);
+	biosize = fuse_iosize(vp);
+	maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1,
+		data->max_readahead_blocks);
+
+	if (bo != NULL)
+		*bo = &vp->v_bufobj;
+
+	/*
+	 * The FUSE_BMAP operation does not include the runp and runb
+	 * variables, so we must guess.  Report nonzero contiguous runs so
+	 * cluster_read will combine adjacent reads.  It's worthwhile to reduce
+	 * upcalls even if we don't know the true physical layout of the file.
+	 * 
+	 * FUSE file systems may opt out of read clustering in two ways:
+	 * * mounting with -onoclusterr
+	 * * Setting max_readahead <= maxbcachebuf during FUSE_INIT
+	 */
+	if (runb != NULL)
+		*runb = MIN(lbn, maxrun);
+	if (runp != NULL) {
+		error = fuse_vnode_size(vp, &filesize, td->td_ucred, td);
+		if (error == 0)
+			*runp = MIN(MAX(0, filesize / biosize - lbn - 1),
+				    maxrun);
+		else
+			*runp = 0;
+	}
+
+	if (fsess_isimpl(mp, FUSE_BMAP)) {
+		fdisp_init(&fdi, sizeof(*fbi));
+		fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred);
+		fbi = fdi.indata;
+		fbi->block = lbn;
+		fbi->blocksize = biosize;
+		error = fdisp_wait_answ(&fdi);
+		if (error == ENOSYS) {
+			fsess_set_notimpl(mp, FUSE_BMAP);
+			error = 0;
+		} else {
+			fbo = fdi.answ;
+			if (error == 0 && pbn != NULL)
+				*pbn = fbo->block;
+			return error;
+		}
+	}
+
+	/* If the daemon doesn't support BMAP, make up a sensible default */
+	if (pbn != NULL)
+		*pbn = lbn * btodb(biosize);
+	return (error);
 }
 
 /*
     struct vop_close_args {
 	struct vnode *a_vp;
 	int  a_fflag;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct ucred *cred = ap->a_cred;
 	int fflag = ap->a_fflag;
 	struct thread *td = ap->a_td;
 	pid_t pid = td->td_proc->p_pid;
 	int err = 0;
 
 	if (fuse_isdeadfs(vp))
 		return 0;
 	if (vnode_isdir(vp))
 		return 0;
 	if (fflag & IO_NDELAY)
 		return 0;
 
 	err = fuse_flush(vp, cred, pid, fflag);
 	/* TODO: close the file handle, if we're sure it's no longer used */
 	if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
 		fuse_vnode_savesize(vp, cred, td->td_proc->p_pid);
 	}
 	return err;
 }
 
 static void
 fdisp_make_mknod_for_fallback(
 	struct fuse_dispatcher *fdip,
 	struct componentname *cnp,
 	struct vnode *dvp,
 	uint64_t parentnid,
 	struct thread *td,
 	struct ucred *cred,
 	mode_t mode,
 	enum fuse_opcode *op)
 {
 	struct fuse_mknod_in *fmni;
 
 	fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1);
 	*op = FUSE_MKNOD;
 	fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred);
 	fmni = fdip->indata;
 	fmni->mode = mode;
 	fmni->rdev = 0;
 	memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr,
 	    cnp->cn_namelen);
 	((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0';
 }
 /*
     struct vnop_create_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
     };
 */
 static int
 fuse_vnop_create(struct vop_create_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct vattr *vap = ap->a_vap;
 	struct thread *td = cnp->cn_thread;
 	struct ucred *cred = cnp->cn_cred;
 
 	struct fuse_data *data;
 	struct fuse_create_in *fci;
 	struct fuse_entry_out *feo;
 	struct fuse_open_out *foo;
 	struct fuse_dispatcher fdi, fdi2;
 	struct fuse_dispatcher *fdip = &fdi;
 	struct fuse_dispatcher *fdip2 = NULL;
 
 	int err;
 
 	struct mount *mp = vnode_mount(dvp);
 	data = fuse_get_mpdata(mp);
 	uint64_t parentnid = VTOFUD(dvp)->nid;
 	mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
 	enum fuse_opcode op;
 	int flags;
 
 	if (fuse_isdeadfs(dvp))
 		return ENXIO;
 
 	/* FUSE expects sockets to be created with FUSE_MKNOD */
 	if (vap->va_type == VSOCK)
 		return fuse_internal_mknod(dvp, vpp, cnp, vap);
 
 	/* 
 	 * VOP_CREATE doesn't tell us the open(2) flags, so we guess.  Only a
 	 * writable mode makes sense, and we might as well include readability
 	 * too.
 	 */
 	flags = O_RDWR;
 
 	bzero(&fdi, sizeof(fdi));
 
 	if (vap->va_type != VREG)
 		return (EINVAL);
 
 	if (!fsess_isimpl(mp, FUSE_CREATE) || vap->va_type == VSOCK) {
 		/* Fallback to FUSE_MKNOD/FUSE_OPEN */
 		fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td,
 			cred, mode, &op);
 	} else {
 		/* Use FUSE_CREATE */
 		size_t insize;
 
 		op = FUSE_CREATE;
 		fdisp_init(fdip, sizeof(*fci) + cnp->cn_namelen + 1);
 		fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred);
 		fci = fdip->indata;
 		fci->mode = mode;
 		fci->flags = O_CREAT | flags;
 		if (fuse_libabi_geq(data, 7, 12)) {
 			insize = sizeof(*fci);
 			fci->umask = td->td_proc->p_fd->fd_cmask;
 		} else {
 			insize = sizeof(struct fuse_open_in);
 		}
 
 		memcpy((char *)fdip->indata + insize, cnp->cn_nameptr,
 		    cnp->cn_namelen);
 		((char *)fdip->indata)[insize + cnp->cn_namelen] = '\0';
 	}
 
 	err = fdisp_wait_answ(fdip);
 
 	if (err) {
 		if (err == ENOSYS && op == FUSE_CREATE) {
 			fsess_set_notimpl(mp, FUSE_CREATE);
 			fdisp_make_mknod_for_fallback(fdip, cnp, dvp,
 				parentnid, td, cred, mode, &op);
 			err = fdisp_wait_answ(fdip);
 		}
 		if (err)
 			goto out;
 	}
 
 	feo = fdip->answ;
 
 	if ((err = fuse_internal_checkentry(feo, vap->va_type))) {
 		goto out;
 	}
 
 	if (op == FUSE_CREATE) {
 		foo = (struct fuse_open_out*)(feo + 1);
 	} else {
 		/* Issue a separate FUSE_OPEN */
 		struct fuse_open_in *foi;
 
 		fdip2 = &fdi2;
 		fdisp_init(fdip2, sizeof(*foi));
 		fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td,
 			cred);
 		foi = fdip2->indata;
 		foi->flags = flags;
 		err = fdisp_wait_answ(fdip2);
 		if (err)
 			goto out;
 		foo = fdip2->answ;
 	}
 	err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vap->va_type);
 	if (err) {
 		struct fuse_release_in *fri;
 		uint64_t nodeid = feo->nodeid;
 		uint64_t fh_id = foo->fh;
 
 		fdisp_init(fdip, sizeof(*fri));
 		fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred);
 		fri = fdip->indata;
 		fri->fh = fh_id;
 		fri->flags = flags;
 		fuse_insert_callback(fdip->tick, fuse_internal_forget_callback);
 		fuse_insert_message(fdip->tick, false);
 		goto out;
 	}
 	ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create");
 	fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
 		feo->attr_valid_nsec, NULL);
 
 	fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td, cred, foo);
 	fuse_vnode_open(*vpp, foo->open_flags, td);
 	/* 
 	 * Purge the parent's attribute cache because the daemon should've
 	 * updated its mtime and ctime
 	 */
 	fuse_vnode_clear_attr_cache(dvp);
 	cache_purge_negative(dvp);
 
 out:
 	if (fdip2)
 		fdisp_destroy(fdip2);
 	fdisp_destroy(fdip);
 	return err;
 }
 
 /*
     struct vnop_fdatasync_args {
 	struct vop_generic_args a_gen;
 	struct vnode * a_vp;
 	struct thread * a_td;
     };
 */
 static int
 fuse_vnop_fdatasync(struct vop_fdatasync_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 	int waitfor = MNT_WAIT;
 
 	int err = 0;
 
 	if (fuse_isdeadfs(vp)) {
 		return 0;
 	}
 	if ((err = vop_stdfdatasync_buf(ap)))
 		return err;
 
 	return fuse_internal_fsync(vp, td, waitfor, true);
 }
 
 /*
     struct vnop_fsync_args {
 	struct vop_generic_args a_gen;
 	struct vnode * a_vp;
 	int  a_waitfor;
 	struct thread * a_td;
     };
 */
 static int
 fuse_vnop_fsync(struct vop_fsync_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 	int waitfor = ap->a_waitfor;
 	int err = 0;
 
 	if (fuse_isdeadfs(vp)) {
 		return 0;
 	}
 	if ((err = vop_stdfsync(ap)))
 		return err;
 
 	return fuse_internal_fsync(vp, td, waitfor, false);
 }
 
 /*
     struct vnop_getattr_args {
 	struct vnode *a_vp;
 	struct vattr *a_vap;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 
 	int err = 0;
 	int dataflags;
 
 	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
 
 	/* Note that we are not bailing out on a dead file system just yet. */
 
 	if (!(dataflags & FSESS_INITED)) {
 		if (!vnode_isvroot(vp)) {
 			fdata_set_dead(fuse_get_mpdata(vnode_mount(vp)));
 			err = ENOTCONN;
 			return err;
 		} else {
 			goto fake;
 		}
 	}
 	err = fuse_internal_getattr(vp, vap, cred, td);
 	if (err == ENOTCONN && vnode_isvroot(vp)) {
 		/* see comment in fuse_vfsop_statfs() */
 		goto fake;
 	} else {
 		return err;
 	}
 
 fake:
 	bzero(vap, sizeof(*vap));
 	vap->va_type = vnode_vtype(vp);
 
 	return 0;
 }
 
 /*
     struct vnop_inactive_args {
 	struct vnode *a_vp;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_inactive(struct vop_inactive_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_filehandle *fufh, *fufh_tmp;
 
 	int need_flush = 1;
 
 	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
 		if (need_flush && vp->v_type == VREG) {
 			if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
 				fuse_vnode_savesize(vp, NULL, 0);
 			}
 			if ((fvdat->flag & FN_REVOKED) != 0)
 				fuse_io_invalbuf(vp, td);
 			else
 				fuse_io_flushbuf(vp, MNT_WAIT, td);
 			need_flush = 0;
 		}
 		fuse_filehandle_close(vp, fufh, td, NULL);
 	}
 
 	if ((fvdat->flag & FN_REVOKED) != 0 && fuse_reclaim_revoked) {
 		vrecycle(vp);
 	}
 	return 0;
 }
 
 /*
     struct vnop_link_args {
 	struct vnode *a_tdvp;
 	struct vnode *a_vp;
 	struct componentname *a_cnp;
     };
 */
 static int
 fuse_vnop_link(struct vop_link_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 
 	struct vattr *vap = VTOVA(vp);
 
 	struct fuse_dispatcher fdi;
 	struct fuse_entry_out *feo;
 	struct fuse_link_in fli;
 
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (vnode_mount(tdvp) != vnode_mount(vp)) {
 		return EXDEV;
 	}
 
 	/*
 	 * This is a seatbelt check to protect naive userspace filesystems from
 	 * themselves and the limitations of the FUSE IPC protocol.  If a
 	 * filesystem does not allow attribute caching, assume it is capable of
 	 * validating that nlink does not overflow.
 	 */
 	if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX)
 		return EMLINK;
 	fli.oldnodeid = VTOI(vp);
 
 	fdisp_init(&fdi, 0);
 	fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp,
 	    FUSE_LINK, &fli, sizeof(fli), &fdi);
 	if ((err = fdisp_wait_answ(&fdi))) {
 		goto out;
 	}
 	feo = fdi.answ;
 
 	err = fuse_internal_checkentry(feo, vnode_vtype(vp));
 	if (!err) {
 		/* 
 		 * Purge the parent's attribute cache because the daemon
 		 * should've updated its mtime and ctime
 		 */
 		fuse_vnode_clear_attr_cache(tdvp);
 		fuse_internal_cache_attrs(vp, &feo->attr, feo->attr_valid,
 			feo->attr_valid_nsec, NULL);
 	}
 out:
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 struct fuse_lookup_alloc_arg {
 	struct fuse_entry_out *feo;
 	struct componentname *cnp;
 	uint64_t nid;
 	enum vtype vtyp;
 };
 
 /* Callback for vn_get_ino */
 static int
 fuse_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
 {
 	struct fuse_lookup_alloc_arg *flaa = arg;
 
 	return fuse_vnode_get(mp, flaa->feo, flaa->nid, NULL, vpp, flaa->cnp,
 		flaa->vtyp);
 }
 
 SDT_PROBE_DEFINE3(fusefs, , vnops, cache_lookup,
 	"int", "struct timespec*", "struct timespec*");
 /*
     struct vnop_lookup_args {
 	struct vnodeop_desc *a_desc;
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
     };
 */
 int
 fuse_vnop_lookup(struct vop_lookup_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct thread *td = cnp->cn_thread;
 	struct ucred *cred = cnp->cn_cred;
 
 	int nameiop = cnp->cn_nameiop;
 	int flags = cnp->cn_flags;
 	int wantparent = flags & (LOCKPARENT | WANTPARENT);
 	int islastcn = flags & ISLASTCN;
 	struct mount *mp = vnode_mount(dvp);
 
 	int err = 0;
 	int lookup_err = 0;
 	struct vnode *vp = NULL;
 
 	struct fuse_dispatcher fdi;
 	bool did_lookup = false;
 	struct fuse_entry_out *feo = NULL;
 	enum vtype vtyp;	/* vnode type of target */
 	off_t filesize;		/* filesize of target */
 
 	uint64_t nid;
 
 	if (fuse_isdeadfs(dvp)) {
 		*vpp = NULL;
 		return ENXIO;
 	}
 	if (!vnode_isdir(dvp))
 		return ENOTDIR;
 
 	if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP))
 		return EROFS;
 
 	if ((err = fuse_internal_access(dvp, VEXEC, td, cred)))
 		return err;
 
 	if (flags & ISDOTDOT) {
 		KASSERT(VTOFUD(dvp)->flag & FN_PARENT_NID,
 			("Looking up .. is TODO"));
 		nid = VTOFUD(dvp)->parent_nid;
 		if (nid == 0)
 			return ENOENT;
 		/* .. is obviously a directory */
 		vtyp = VDIR;
 		filesize = 0;
 	} else if (cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.') {
 		nid = VTOI(dvp);
 		/* . is obviously a directory */
 		vtyp = VDIR;
 		filesize = 0;
 	} else {
 		struct timespec now, timeout;
 
 		err = cache_lookup(dvp, vpp, cnp, &timeout, NULL);
 		getnanouptime(&now);
 		SDT_PROBE3(fusefs, , vnops, cache_lookup, err, &timeout, &now);
 		switch (err) {
 		case -1:		/* positive match */
 			if (timespeccmp(&timeout, &now, >)) {
 				atomic_add_acq_long(&fuse_lookup_cache_hits, 1);
 			} else {
 				/* Cache timeout */
 				atomic_add_acq_long(&fuse_lookup_cache_misses,
 					1);
 				bintime_clear(
 					&VTOFUD(*vpp)->entry_cache_timeout);
 				cache_purge(*vpp);
 				if (dvp != *vpp)
 					vput(*vpp);
 				else 
 					vrele(*vpp);
 				*vpp = NULL;
 				break;
 			}
 			return 0;
 
 		case 0:		/* no match in cache */
 			atomic_add_acq_long(&fuse_lookup_cache_misses, 1);
 			break;
 
 		case ENOENT:		/* negative match */
 			getnanouptime(&now);
 			if (timespeccmp(&timeout, &now, <=)) {
 				/* Cache timeout */
 				cache_purge_negative(dvp);
 				break;
 			}
 			/* fall through */
 		default:
 			return err;
 		}
 
 		nid = VTOI(dvp);
 		fdisp_init(&fdi, cnp->cn_namelen + 1);
 		fdisp_make(&fdi, FUSE_LOOKUP, mp, nid, td, cred);
 
 		memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
 		((char *)fdi.indata)[cnp->cn_namelen] = '\0';
 		lookup_err = fdisp_wait_answ(&fdi);
 		did_lookup = true;
 
 		if (!lookup_err) {
 			/* lookup call succeeded */
 			feo = (struct fuse_entry_out *)fdi.answ;
 			nid = feo->nodeid;
 			if (nid == 0) {
 				/* zero nodeid means ENOENT and cache it */
 				struct timespec timeout;
 
 				fdi.answ_stat = ENOENT;
 				lookup_err = ENOENT;
 				if (cnp->cn_flags & MAKEENTRY) {
 					fuse_validity_2_timespec(feo, &timeout);
 					cache_enter_time(dvp, *vpp, cnp,
 						&timeout, NULL);
 				}
 			} else if (nid == FUSE_ROOT_ID) {
 				lookup_err = EINVAL;
 			}
 			vtyp = IFTOVT(feo->attr.mode);
 			filesize = feo->attr.size;
 		}
 		if (lookup_err && (!fdi.answ_stat || lookup_err != ENOENT)) {
 			fdisp_destroy(&fdi);
 			return lookup_err;
 		}
 	}
 	/* lookup_err, if non-zero, must be ENOENT at this point */
 
 	if (lookup_err) {
 		/* Entry not found */
 		if ((nameiop == CREATE || nameiop == RENAME) && islastcn) {
 			err = fuse_internal_access(dvp, VWRITE, td, cred);
 			if (!err) {
 				/*
 				 * Set the SAVENAME flag to hold onto the
 				 * pathname for use later in VOP_CREATE or
 				 * VOP_RENAME.
 				 */
 				cnp->cn_flags |= SAVENAME;
 
 				err = EJUSTRETURN;
 			}
 		} else {
 			err = ENOENT;
 		}
 	} else {
 		/* Entry was found */
 		if (flags & ISDOTDOT) {
 			struct fuse_lookup_alloc_arg flaa;
 
 			flaa.nid = nid;
 			flaa.feo = feo;
 			flaa.cnp = cnp;
 			flaa.vtyp = vtyp;
 			err = vn_vget_ino_gen(dvp, fuse_lookup_alloc, &flaa, 0,
 				&vp);
 			*vpp = vp;
 		} else if (nid == VTOI(dvp)) {
 			vref(dvp);
 			*vpp = dvp;
 		} else {
 			struct fuse_vnode_data *fvdat;
 
 			err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp,
 			    &vp, cnp, vtyp);
 			if (err)
 				goto out;
 			*vpp = vp;
 
 			/*
 			 * In the case where we are looking up a FUSE node
 			 * represented by an existing cached vnode, and the
 			 * true size reported by FUSE_LOOKUP doesn't match
 			 * the vnode's cached size, then any cached writes
 			 * beyond the file's current size are lost.
 			 *
 			 * We can get here:
 			 * * following attribute cache expiration, or
 			 * * due a bug in the daemon, or
 			 */
 			fvdat = VTOFUD(vp);
 			if (vnode_isreg(vp) &&
 			    filesize != fvdat->cached_attrs.va_size &&
 			    fvdat->flag & FN_SIZECHANGE) {
 				/*
 				 * The FN_SIZECHANGE flag reflects a dirty
 				 * append.  If userspace lets us know our cache
 				 * is invalid, that write was lost.  (Dirty
 				 * writes that do not cause append are also
 				 * lost, but we don't detect them here.)
 				 *
 				 * XXX: Maybe disable WB caching on this mount.
 				 */
 				printf("%s: WB cache incoherent on %s!\n",
 				    __func__,
 				    vnode_mount(vp)->mnt_stat.f_mntonname);
 
 				fvdat->flag &= ~FN_SIZECHANGE;
 			}
 
 			MPASS(feo != NULL);
 			fuse_internal_cache_attrs(*vpp, &feo->attr,
 				feo->attr_valid, feo->attr_valid_nsec, NULL);
 			fuse_validity_2_bintime(feo->entry_valid,
 				feo->entry_valid_nsec,
 				&fvdat->entry_cache_timeout);
 
 			if ((nameiop == DELETE || nameiop == RENAME) &&
 				islastcn)
 			{
 				struct vattr dvattr;
 
 				err = fuse_internal_access(dvp, VWRITE, td,
 					cred);
 				if (err != 0)
 					goto out;
 				/* 
 				 * if the parent's sticky bit is set, check
 				 * whether we're allowed to remove the file.
 				 * Need to figure out the vnode locking to make
 				 * this work.
 				 */
 				fuse_internal_getattr(dvp, &dvattr, cred, td);
 				if ((dvattr.va_mode & S_ISTXT) &&
 					fuse_internal_access(dvp, VADMIN, td,
 						cred) &&
 					fuse_internal_access(*vpp, VADMIN, td,
 						cred)) {
 					err = EPERM;
 					goto out;
 				}
 			}
 
 			if (islastcn && (
 				(nameiop == DELETE) ||
 				(nameiop == RENAME && wantparent))) {
 				cnp->cn_flags |= SAVENAME;
 			}
 
 		}
 	}
 out:
 	if (err) {
 		if (vp != NULL && dvp != vp)
 			vput(vp);
 		else if (vp != NULL)
 			vrele(vp);
 		*vpp = NULL;
 	}
 	if (did_lookup)
 		fdisp_destroy(&fdi);
 
 	return err;
 }
 
 /*
     struct vnop_mkdir_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
     };
 */
 static int
 fuse_vnop_mkdir(struct vop_mkdir_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct vattr *vap = ap->a_vap;
 
 	struct fuse_mkdir_in fmdi;
 
 	if (fuse_isdeadfs(dvp)) {
 		return ENXIO;
 	}
 	fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode);
 	fmdi.umask = curthread->td_proc->p_fd->fd_cmask;
 
 	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi,
 	    sizeof(fmdi), VDIR));
 }
 
 /*
     struct vnop_mknod_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
     };
 */
 static int
 fuse_vnop_mknod(struct vop_mknod_args *ap)
 {
 
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct vattr *vap = ap->a_vap;
 
 	if (fuse_isdeadfs(dvp))
 		return ENXIO;
 
 	return fuse_internal_mknod(dvp, vpp, cnp, vap);
 }
 
 /*
     struct vop_open_args {
 	struct vnode *a_vp;
 	int  a_mode;
 	struct ucred *a_cred;
 	struct thread *a_td;
 	int a_fdidx; / struct file *a_fp;
     };
 */
 static int
 fuse_vnop_open(struct vop_open_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	int a_mode = ap->a_mode;
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	pid_t pid = td->td_proc->p_pid;
 	struct fuse_vnode_data *fvdat;
 
 	if (fuse_isdeadfs(vp))
 		return ENXIO;
 	if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO)
 		return (EOPNOTSUPP);
 	if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0)
 		return EINVAL;
 
 	fvdat = VTOFUD(vp);
 
 	if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) {
 		fuse_vnode_open(vp, 0, td);
 		return 0;
 	}
 
 	return fuse_filehandle_open(vp, a_mode, NULL, td, cred);
 }
 
 static int
 fuse_vnop_pathconf(struct vop_pathconf_args *ap)
 {
 
 	switch (ap->a_name) {
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		return (0);
 	case _PC_LINK_MAX:
 		*ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX);
 		return (0);
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = MAXPATHLEN;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		return (0);
 	default:
 		return (vop_stdpathconf(ap));
 	}
 }
 
 /*
     struct vnop_read_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	int  a_ioflag;
 	struct ucred *a_cred;
     };
 */
 static int
 fuse_vnop_read(struct vop_read_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	int ioflag = ap->a_ioflag;
 	struct ucred *cred = ap->a_cred;
 	pid_t pid = curthread->td_proc->p_pid;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 
 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
 		ioflag |= IO_DIRECT;
 	}
 
 	return fuse_io_dispatch(vp, uio, ioflag, false, cred, pid);
 }
 
 /*
     struct vnop_readdir_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	struct ucred *a_cred;
 	int *a_eofflag;
 	int *a_ncookies;
 	u_long **a_cookies;
     };
 */
 static int
 fuse_vnop_readdir(struct vop_readdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct ucred *cred = ap->a_cred;
 	struct fuse_filehandle *fufh = NULL;
 	struct fuse_iov cookediov;
 	int err = 0;
 	u_long *cookies;
 	off_t startoff;
 	ssize_t tresid;
 	int ncookies;
 	bool closefufh = false;
 	pid_t pid = curthread->td_proc->p_pid;
 
 	if (ap->a_eofflag)
 		*ap->a_eofflag = 0;
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (				/* XXXIP ((uio_iovcnt(uio) > 1)) || */
 	    (uio_resid(uio) < sizeof(struct dirent))) {
 		return EINVAL;
 	}
 
 	tresid = uio->uio_resid;
 	startoff = uio->uio_offset;
 	err = fuse_filehandle_get_dir(vp, &fufh, cred, pid);
 	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
 		/* 
 		 * nfsd will do VOP_READDIR without first doing VOP_OPEN.  We
 		 * must implicitly open the directory here
 		 */
 		err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred);
 		if (err == 0) {
 			/*
 			 * When a directory is opened, it must be read from
 			 * the beginning.  Hopefully, the "startoff" still
 			 * exists as an offset cookie for the directory.
 			 * If not, it will read the entire directory without
 			 * returning any entries and just return eof.
 			 */
 			uio->uio_offset = 0;
 		}
 		closefufh = true;
 	}
 	if (err)
 		return (err);
 	if (ap->a_ncookies != NULL) {
 		ncookies = uio->uio_resid /
 			(offsetof(struct dirent, d_name) + 4) + 1;
 		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	} else {
 		ncookies = 0;
 		cookies = NULL;
 	}
 #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1)
 	fiov_init(&cookediov, DIRCOOKEDSIZE);
 
 	err = fuse_internal_readdir(vp, uio, startoff, fufh, &cookediov,
 		&ncookies, cookies);
 
 	fiov_teardown(&cookediov);
 	if (closefufh)
 		fuse_filehandle_close(vp, fufh, curthread, cred);
 
 	if (ap->a_ncookies != NULL) {
 		if (err == 0) {
 			*ap->a_ncookies -= ncookies;
 		} else {
 			free(*ap->a_cookies, M_TEMP);
 			*ap->a_ncookies = 0;
 			*ap->a_cookies = NULL;
 		}
 	}
 	if (err == 0 && tresid == uio->uio_resid)
 		*ap->a_eofflag = 1;
 
 	return err;
 }
 
 /*
     struct vnop_readlink_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	struct ucred *a_cred;
     };
 */
 static int
 fuse_vnop_readlink(struct vop_readlink_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct ucred *cred = ap->a_cred;
 
 	struct fuse_dispatcher fdi;
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (!vnode_islnk(vp)) {
 		return EINVAL;
 	}
 	fdisp_init(&fdi, 0);
 	err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred);
 	if (err) {
 		goto out;
 	}
 	if (((char *)fdi.answ)[0] == '/' &&
 	    fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) {
 		char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname;
 
 		err = uiomove(mpth, strlen(mpth), uio);
 	}
 	if (!err) {
 		err = uiomove(fdi.answ, fdi.iosize, uio);
 	}
 out:
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /*
     struct vnop_reclaim_args {
 	struct vnode *a_vp;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_reclaim(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_filehandle *fufh, *fufh_tmp;
 
 	if (!fvdat) {
 		panic("FUSE: no vnode data during recycling");
 	}
 	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
 		printf("FUSE: vnode being reclaimed with open fufh "
 			"(type=%#x)", fufh->fufh_type);
 		fuse_filehandle_close(vp, fufh, td, NULL);
 	}
 
 	if ((!fuse_isdeadfs(vp)) && (fvdat->nlookup)) {
 		fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp),
 		    fvdat->nlookup);
 	}
 	fuse_vnode_setparent(vp, NULL);
 	cache_purge(vp);
 	vfs_hash_remove(vp);
 	vnode_destroy_vobject(vp);
 	fuse_vnode_destroy(vp);
 
 	return 0;
 }
 
 /*
     struct vnop_remove_args {
 	struct vnode *a_dvp;
 	struct vnode *a_vp;
 	struct componentname *a_cnp;
     };
 */
 static int
 fuse_vnop_remove(struct vop_remove_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode *vp = ap->a_vp;
 	struct componentname *cnp = ap->a_cnp;
 
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (vnode_isdir(vp)) {
 		return EPERM;
 	}
 	cache_purge(vp);
 
 	err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK);
 
 	if (err == 0) {
 		fuse_internal_vnode_disappear(vp);
 		/* 
 		 * Purge the parent's attribute cache because the daemon
 		 * should've updated its mtime and ctime
 		 */
 		fuse_vnode_clear_attr_cache(dvp);
 	}
 	return err;
 }
 
 /*
     struct vnop_rename_args {
 	struct vnode *a_fdvp;
 	struct vnode *a_fvp;
 	struct componentname *a_fcnp;
 	struct vnode *a_tdvp;
 	struct vnode *a_tvp;
 	struct componentname *a_tcnp;
     };
 */
 static int
 fuse_vnop_rename(struct vop_rename_args *ap)
 {
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *tvp = ap->a_tvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct fuse_data *data;
 	bool newparent = fdvp != tdvp;
 	bool isdir = fvp->v_type == VDIR;
 	int err = 0;
 
 	if (fuse_isdeadfs(fdvp)) {
 		return ENXIO;
 	}
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp && fvp->v_mount != tvp->v_mount)) {
 		SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename");
 		err = EXDEV;
 		goto out;
 	}
 	cache_purge(fvp);
 
 	/*
 	 * FUSE library is expected to check if target directory is not
 	 * under the source directory in the file system tree.
 	 * Linux performs this check at VFS level.
 	 */
 	/* 
 	 * If source is a directory, and it will get a new parent, user must
 	 * have write permission to it, so ".." can be modified.
 	 */
 	data = fuse_get_mpdata(vnode_mount(tdvp));
 	if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) {
 		err = fuse_internal_access(fvp, VWRITE,
 			tcnp->cn_thread, tcnp->cn_cred);
 		if (err)
 			goto out;
 	}
 	sx_xlock(&data->rename_lock);
 	err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
 	if (err == 0) {
 		if (tdvp != fdvp)
 			fuse_vnode_setparent(fvp, tdvp);
 		if (tvp != NULL)
 			fuse_vnode_setparent(tvp, NULL);
 	}
 	sx_unlock(&data->rename_lock);
 
 	if (tvp != NULL && tvp != fvp) {
 		cache_purge(tvp);
 	}
 	if (vnode_isdir(fvp)) {
 		if ((tvp != NULL) && vnode_isdir(tvp)) {
 			cache_purge(tdvp);
 		}
 		cache_purge(fdvp);
 	}
 out:
 	if (tdvp == tvp) {
 		vrele(tdvp);
 	} else {
 		vput(tdvp);
 	}
 	if (tvp != NULL) {
 		vput(tvp);
 	}
 	vrele(fdvp);
 	vrele(fvp);
 
 	return err;
 }
 
 /*
     struct vnop_rmdir_args {
 	    struct vnode *a_dvp;
 	    struct vnode *a_vp;
 	    struct componentname *a_cnp;
     } *ap;
 */
 static int
 fuse_vnop_rmdir(struct vop_rmdir_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode *vp = ap->a_vp;
 
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (VTOFUD(vp) == VTOFUD(dvp)) {
 		return EINVAL;
 	}
 	err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR);
 
 	if (err == 0) {
 		fuse_internal_vnode_disappear(vp);
 		/* 
 		 * Purge the parent's attribute cache because the daemon
 		 * should've updated its mtime and ctime
 		 */
 		fuse_vnode_clear_attr_cache(dvp);
 	}
 	return err;
 }
 
 /*
     struct vnop_setattr_args {
 	struct vnode *a_vp;
 	struct vattr *a_vap;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_setattr(struct vop_setattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 	struct mount *mp;
 	struct fuse_data *data;
 	struct vattr old_va;
 	int dataflags;
 	int err = 0, err2;
 	accmode_t accmode = 0;
 	bool checkperm;
 	bool drop_suid = false;
 	gid_t cr_gid;
 
 	mp = vnode_mount(vp);
 	data = fuse_get_mpdata(mp);
 	dataflags = data->dataflags;
 	checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS;
 	if (cred->cr_ngroups > 0)
 		cr_gid = cred->cr_groups[0];
 	else
 		cr_gid = 0;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 
 	if (vap->va_uid != (uid_t)VNOVAL) {
 		if (checkperm) {
 			/* Only root may change a file's owner */
 			err = priv_check_cred(cred, PRIV_VFS_CHOWN);
 			if (err) {
 				/* As a special case, allow the null chown */
 				err2 = fuse_internal_getattr(vp, &old_va, cred,
 					td);
 				if (err2)
 					return (err2);
 				if (vap->va_uid != old_va.va_uid)
 					return err;
 				else
 					accmode |= VADMIN;
 				drop_suid = true;
 			} else
 				accmode |= VADMIN;
 		} else
 			accmode |= VADMIN;
 	}
 	if (vap->va_gid != (gid_t)VNOVAL) {
 		if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN))
 			drop_suid = true;
 		if (checkperm && !groupmember(vap->va_gid, cred))
 		{
 			/*
 			 * Non-root users may only chgrp to one of their own
 			 * groups 
 			 */
 			err = priv_check_cred(cred, PRIV_VFS_CHOWN);
 			if (err) {
 				/* As a special case, allow the null chgrp */
 				err2 = fuse_internal_getattr(vp, &old_va, cred,
 					td);
 				if (err2)
 					return (err2);
 				if (vap->va_gid != old_va.va_gid)
 					return err;
 				accmode |= VADMIN;
 			} else
 				accmode |= VADMIN;
 		} else
 			accmode |= VADMIN;
 	}
 	if (vap->va_size != VNOVAL) {
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VLNK:
 		case VREG:
 			if (vfs_isrdonly(mp))
 				return (EROFS);
 			break;
 		default:
 			/*
 			 * According to POSIX, the result is unspecified
 			 * for file types other than regular files,
 			 * directories and shared memory objects.  We
 			 * don't support shared memory objects in the file
 			 * system, and have dubious support for truncating
 			 * symlinks.  Just ignore the request in other cases.
 			 */
 			return (0);
 		}
 		/* Don't set accmode.  Permission to trunc is checked upstack */
 	}
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		if (vap->va_vaflags & VA_UTIMES_NULL)
 			accmode |= VWRITE;
 		else
 			accmode |= VADMIN;
 	}
 	if (drop_suid) {
 		if (vap->va_mode != (mode_t)VNOVAL)
 			vap->va_mode &= ~(S_ISUID | S_ISGID);
 		else {
 			err = fuse_internal_getattr(vp, &old_va, cred, td);
 			if (err)
 				return (err);
 			vap->va_mode = old_va.va_mode & ~(S_ISUID | S_ISGID);
 		}
 	}
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		/* Only root may set the sticky bit on non-directories */
 		if (checkperm && vp->v_type != VDIR && (vap->va_mode & S_ISTXT)
 		    && priv_check_cred(cred, PRIV_VFS_STICKYFILE))
 			return EFTYPE;
 		if (checkperm && (vap->va_mode & S_ISGID)) {
 			err = fuse_internal_getattr(vp, &old_va, cred, td);
 			if (err)
 				return (err);
 			if (!groupmember(old_va.va_gid, cred)) {
 				err = priv_check_cred(cred, PRIV_VFS_SETGID);
 				if (err)
 					return (err);
 			}
 		}
 		accmode |= VADMIN;
 	}
 
 	if (vfs_isrdonly(mp))
 		return EROFS;
 
 	err = fuse_internal_access(vp, accmode, td, cred);
 	if (err)
 		return err;
 	else
 		return fuse_internal_setattr(vp, vap, td, cred);
 }
 
 /*
     struct vnop_strategy_args {
 	struct vnode *a_vp;
 	struct buf *a_bp;
     };
 */
 static int
 fuse_vnop_strategy(struct vop_strategy_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct buf *bp = ap->a_bp;
 
 	if (!vp || fuse_isdeadfs(vp)) {
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = ENXIO;
 		bufdone(bp);
 		return 0;
 	}
 
 	/*
 	 * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags.
 	 * fuse_io_strategy sets bp's error fields
 	 */
 	(void)fuse_io_strategy(vp, bp);
 
 	return 0;
 }
 
 
 /*
     struct vnop_symlink_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
 	char *a_target;
     };
 */
 static int
 fuse_vnop_symlink(struct vop_symlink_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	const char *target = ap->a_target;
 
 	struct fuse_dispatcher fdi;
 
 	int err;
 	size_t len;
 
 	if (fuse_isdeadfs(dvp)) {
 		return ENXIO;
 	}
 	/*
 	 * Unlike the other creator type calls, here we have to create a message
 	 * where the name of the new entry comes first, and the data describing
 	 * the entry comes second.
 	 * Hence we can't rely on our handy fuse_internal_newentry() routine,
 	 * but put together the message manually and just call the core part.
 	 */
 
 	len = strlen(target) + 1;
 	fdisp_init(&fdi, len + cnp->cn_namelen + 1);
 	fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL);
 
 	memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
 	((char *)fdi.indata)[cnp->cn_namelen] = '\0';
 	memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len);
 
 	err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi);
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /*
     struct vnop_write_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	int  a_ioflag;
 	struct ucred *a_cred;
     };
 */
 static int
 fuse_vnop_write(struct vop_write_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	int ioflag = ap->a_ioflag;
 	struct ucred *cred = ap->a_cred;
 	pid_t pid = curthread->td_proc->p_pid;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 
 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
 		ioflag |= IO_DIRECT;
 	}
 
 	return fuse_io_dispatch(vp, uio, ioflag, false, cred, pid);
 }
 
 SDT_PROBE_DEFINE1(fusefs, , vnops, vnop_getpages_error, "int");
 /*
     struct vnop_getpages_args {
 	struct vnode *a_vp;
 	vm_page_t *a_m;
 	int a_count;
 	int a_reqpage;
     };
 */
 static int
 fuse_vnop_getpages(struct vop_getpages_args *ap)
 {
 	int i, error, nextoff, size, toff, count, npages;
 	struct uio uio;
 	struct iovec iov;
 	vm_offset_t kva;
 	struct buf *bp;
 	struct vnode *vp;
 	struct thread *td;
 	struct ucred *cred;
 	vm_page_t *pages;
 	pid_t pid = curthread->td_proc->p_pid;
 
 	vp = ap->a_vp;
 	KASSERT(vp->v_object, ("objectless vp passed to getpages"));
 	td = curthread;			/* XXX */
 	cred = curthread->td_ucred;	/* XXX */
 	pages = ap->a_m;
 	npages = ap->a_count;
 
 	if (!fsess_opt_mmap(vnode_mount(vp))) {
 		SDT_PROBE2(fusefs, , vnops, trace, 1,
 			"called on non-cacheable vnode??\n");
 		return (VM_PAGER_ERROR);
 	}
 
 	/*
 	 * If the last page is partially valid, just return it and allow
 	 * the pager to zero-out the blanks.  Partially valid pages can
 	 * only occur at the file EOF.
 	 *
 	 * XXXGL: is that true for FUSE, which is a local filesystem,
 	 * but still somewhat disconnected from the kernel?
 	 */
 	VM_OBJECT_WLOCK(vp->v_object);
 	if (pages[npages - 1]->valid != 0 && --npages == 0)
 		goto out;
 	VM_OBJECT_WUNLOCK(vp->v_object);
 
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
 	 * convenient and fast.
 	 */
 	bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
 
 	kva = (vm_offset_t)bp->b_data;
 	pmap_qenter(kva, pages, npages);
 	VM_CNT_INC(v_vnodein);
 	VM_CNT_ADD(v_vnodepgsin, npages);
 
 	count = npages << PAGE_SHIFT;
 	iov.iov_base = (caddr_t)kva;
 	iov.iov_len = count;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
 	uio.uio_resid = count;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_td = td;
 
 	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, true, cred, pid);
 	pmap_qremove(kva, npages);
 
 	uma_zfree(fuse_pbuf_zone, bp);
 
 	if (error && (uio.uio_resid == count)) {
 		SDT_PROBE1(fusefs, , vnops, vnop_getpages_error, error);
 		return VM_PAGER_ERROR;
 	}
 	/*
 	 * Calculate the number of bytes read and validate only that number
 	 * of bytes.  Note that due to pending writes, size may be 0.  This
 	 * does not mean that the remaining data is invalid!
 	 */
 
 	size = count - uio.uio_resid;
 	VM_OBJECT_WLOCK(vp->v_object);
 	fuse_vm_page_lock_queues();
 	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
 		vm_page_t m;
 
 		nextoff = toff + PAGE_SIZE;
 		m = pages[i];
 
 		if (nextoff <= size) {
 			/*
 			 * Read operation filled an entire page
 			 */
 			m->valid = VM_PAGE_BITS_ALL;
 			KASSERT(m->dirty == 0,
 			    ("fuse_getpages: page %p is dirty", m));
 		} else if (size > toff) {
 			/*
 			 * Read operation filled a partial page.
 			 */
 			m->valid = 0;
 			vm_page_set_valid_range(m, 0, size - toff);
 			KASSERT(m->dirty == 0,
 			    ("fuse_getpages: page %p is dirty", m));
 		} else {
 			/*
 			 * Read operation was short.  If no error occurred
 			 * we may have hit a zero-fill section.   We simply
 			 * leave valid set to 0.
 			 */
 			;
 		}
 	}
 	fuse_vm_page_unlock_queues();
 out:
 	VM_OBJECT_WUNLOCK(vp->v_object);
 	if (ap->a_rbehind)
 		*ap->a_rbehind = 0;
 	if (ap->a_rahead)
 		*ap->a_rahead = 0;
 	return (VM_PAGER_OK);
 }
 
 /*
     struct vnop_putpages_args {
 	struct vnode *a_vp;
 	vm_page_t *a_m;
 	int a_count;
 	int a_sync;
 	int *a_rtvals;
 	vm_ooffset_t a_offset;
     };
 */
 static int
 fuse_vnop_putpages(struct vop_putpages_args *ap)
 {
 	struct uio uio;
 	struct iovec iov;
 	vm_offset_t kva;
 	struct buf *bp;
 	int i, error, npages, count;
 	off_t offset;
 	int *rtvals;
 	struct vnode *vp;
 	struct thread *td;
 	struct ucred *cred;
 	vm_page_t *pages;
 	vm_ooffset_t fsize;
 	pid_t pid = curthread->td_proc->p_pid;
 
 	vp = ap->a_vp;
 	KASSERT(vp->v_object, ("objectless vp passed to putpages"));
 	fsize = vp->v_object->un_pager.vnp.vnp_size;
 	td = curthread;			/* XXX */
 	cred = curthread->td_ucred;	/* XXX */
 	pages = ap->a_m;
 	count = ap->a_count;
 	rtvals = ap->a_rtvals;
 	npages = btoc(count);
 	offset = IDX_TO_OFF(pages[0]->pindex);
 
 	if (!fsess_opt_mmap(vnode_mount(vp))) {
 		SDT_PROBE2(fusefs, , vnops, trace, 1,
 			"called on non-cacheable vnode??\n");
 	}
 	for (i = 0; i < npages; i++)
 		rtvals[i] = VM_PAGER_AGAIN;
 
 	/*
 	 * When putting pages, do not extend file past EOF.
 	 */
 
 	if (offset + count > fsize) {
 		count = fsize - offset;
 		if (count < 0)
 			count = 0;
 	}
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
 	 * convenient and fast.
 	 */
 	bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
 
 	kva = (vm_offset_t)bp->b_data;
 	pmap_qenter(kva, pages, npages);
 	VM_CNT_INC(v_vnodeout);
 	VM_CNT_ADD(v_vnodepgsout, count);
 
 	iov.iov_base = (caddr_t)kva;
 	iov.iov_len = count;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = offset;
 	uio.uio_resid = count;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_WRITE;
 	uio.uio_td = td;
 
 	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, true, cred, pid);
 
 	pmap_qremove(kva, npages);
 	uma_zfree(fuse_pbuf_zone, bp);
 
 	if (!error) {
 		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
 
 		for (i = 0; i < nwritten; i++) {
 			rtvals[i] = VM_PAGER_OK;
 			VM_OBJECT_WLOCK(pages[i]->object);
 			vm_page_undirty(pages[i]);
 			VM_OBJECT_WUNLOCK(pages[i]->object);
 		}
 	}
 	return rtvals[0];
 }
 
 static const char extattr_namespace_separator = '.';
 
 /*
     struct vop_getextattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_attrnamespace;
 	const char *a_name;
 	struct uio *a_uio;
 	size_t *a_size;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_getextattr(struct vop_getextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct fuse_dispatcher fdi;
 	struct fuse_getxattr_in *get_xattr_in;
 	struct fuse_getxattr_out *get_xattr_out;
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	char *prefix;
 	char *attr_str;
 	size_t len;
 	int err;
 
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	if (!fsess_isimpl(mp, FUSE_GETXATTR))
 		return EOPNOTSUPP;
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
 	if (err)
 		return err;
 
 	/* Default to looking for user attributes. */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
 	else
 		prefix = EXTATTR_NAMESPACE_USER_STRING;
 
 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
 	    strlen(ap->a_name) + 1;
 
 	fdisp_init(&fdi, len + sizeof(*get_xattr_in));
 	fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred);
 
 	get_xattr_in = fdi.indata;
 	/*
 	 * Check to see whether we're querying the available size or
 	 * issuing the actual request.  If we pass in 0, we get back struct
 	 * fuse_getxattr_out.  If we pass in a non-zero size, we get back
 	 * that much data, without the struct fuse_getxattr_out header.
 	 */
 	if (uio == NULL)
 		get_xattr_in->size = 0;
 	else
 		get_xattr_in->size = uio->uio_resid;
 
 	attr_str = (char *)fdi.indata + sizeof(*get_xattr_in);
 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
 	    ap->a_name);
 
 	err = fdisp_wait_answ(&fdi);
 	if (err != 0) {
 		if (err == ENOSYS) {
 			fsess_set_notimpl(mp, FUSE_GETXATTR);
 			err = EOPNOTSUPP;
 		}
 		goto out;
 	}
 
 	get_xattr_out = fdi.answ;
 
 	if (ap->a_size != NULL)
 		*ap->a_size = get_xattr_out->size;
 
 	if (uio != NULL)
 		err = uiomove(fdi.answ, fdi.iosize, uio);
 
 out:
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 /*
     struct vop_setextattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_attrnamespace;
 	const char *a_name;
 	struct uio *a_uio;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_setextattr(struct vop_setextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct fuse_dispatcher fdi;
 	struct fuse_setxattr_in *set_xattr_in;
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	char *prefix;
 	size_t len;
 	char *attr_str;
 	int err;
 	
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	if (!fsess_isimpl(mp, FUSE_SETXATTR))
 		return EOPNOTSUPP;
 
 	if (vfs_isrdonly(mp))
 		return EROFS;
 
 	/* Deleting xattrs must use VOP_DELETEEXTATTR instead */
 	if (ap->a_uio == NULL) {
 		/*
 		 * If we got here as fallback from VOP_DELETEEXTATTR, then
 		 * return EOPNOTSUPP.
 		 */
 		if (!fsess_isimpl(mp, FUSE_REMOVEXATTR))
 			return (EOPNOTSUPP);
 		else
 			return (EINVAL);
 	}
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
 		VWRITE);
 	if (err)
 		return err;
 
 	/* Default to looking for user attributes. */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
 	else
 		prefix = EXTATTR_NAMESPACE_USER_STRING;
 
 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
 	    strlen(ap->a_name) + 1;
 
 	fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid);
 	fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred);
 
 	set_xattr_in = fdi.indata;
 	set_xattr_in->size = uio->uio_resid;
 
 	attr_str = (char *)fdi.indata + sizeof(*set_xattr_in);
 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
 	    ap->a_name);
 
 	err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len,
 	    uio->uio_resid, uio);
 	if (err != 0) {
 		goto out;
 	}
 
 	err = fdisp_wait_answ(&fdi);
 
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_SETXATTR);
 		err = EOPNOTSUPP;
 	}
 	if (err == ERESTART) {
 		/* Can't restart after calling uiomove */
 		err = EINTR;
 	}
 
 out:
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 /*
  * The Linux / FUSE extended attribute list is simply a collection of
  * NUL-terminated strings.  The FreeBSD extended attribute list is a single
  * byte length followed by a non-NUL terminated string.  So, this allows
  * conversion of the Linux / FUSE format to the FreeBSD format in place.
  * Linux attribute names are reported with the namespace as a prefix (e.g.
  * "user.attribute_name"), but in FreeBSD they are reported without the
  * namespace prefix (e.g. "attribute_name").  So, we're going from:
  *
  * user.attr_name1\0user.attr_name2\0
  *
  * to:
  *
  * <num>attr_name1<num>attr_name2
  *
  * Where "<num>" is a single byte number of characters in the attribute name.
  * 
  * Args:
  * prefix - exattr namespace prefix string
  * list, list_len - input list with namespace prefixes
  * bsd_list, bsd_list_len - output list compatible with bsd vfs
  */
 static int
 fuse_xattrlist_convert(char *prefix, const char *list, int list_len,
     char *bsd_list, int *bsd_list_len)
 {
 	int len, pos, dist_to_next, prefix_len;
 
 	pos = 0;
 	*bsd_list_len = 0;
 	prefix_len = strlen(prefix);
 
 	while (pos < list_len && list[pos] != '\0') {
 		dist_to_next = strlen(&list[pos]) + 1;
 		if (bcmp(&list[pos], prefix, prefix_len) == 0 &&
 		    list[pos + prefix_len] == extattr_namespace_separator) {
 			len = dist_to_next -
 			    (prefix_len + sizeof(extattr_namespace_separator)) - 1;
 			if (len >= EXTATTR_MAXNAMELEN)
 				return (ENAMETOOLONG);
 
 			bsd_list[*bsd_list_len] = len;
 			memcpy(&bsd_list[*bsd_list_len + 1],
 			    &list[pos + prefix_len +
 			    sizeof(extattr_namespace_separator)], len);
 
 			*bsd_list_len += len + 1;
 		}
 
 		pos += dist_to_next;
 	}
 
 	return (0);
 }
 
 /*
     struct vop_listextattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_attrnamespace;
 	struct uio *a_uio;
 	size_t *a_size;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_listextattr(struct vop_listextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct fuse_dispatcher fdi;
 	struct fuse_listxattr_in *list_xattr_in;
 	struct fuse_listxattr_out *list_xattr_out;
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	size_t len;
 	char *prefix;
 	char *attr_str;
 	char *bsd_list = NULL;
 	char *linux_list;
 	int bsd_list_len;
 	int linux_list_len;
 	int err;
 
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	if (!fsess_isimpl(mp, FUSE_LISTXATTR))
 		return EOPNOTSUPP;
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
 	if (err)
 		return err;
 
 	/*
 	 * Add space for a NUL and the period separator if enabled.
 	 * Default to looking for user attributes.
 	 */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
 	else
 		prefix = EXTATTR_NAMESPACE_USER_STRING;
 
 	len = strlen(prefix) + sizeof(extattr_namespace_separator) + 1;
 
 	fdisp_init(&fdi, sizeof(*list_xattr_in) + len);
 	fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
 
 	/*
 	 * Retrieve Linux / FUSE compatible list size.
 	 */
 	list_xattr_in = fdi.indata;
 	list_xattr_in->size = 0;
 	attr_str = (char *)fdi.indata + sizeof(*list_xattr_in);
 	snprintf(attr_str, len, "%s%c", prefix, extattr_namespace_separator);
 
 	err = fdisp_wait_answ(&fdi);
 	if (err != 0) {
 		if (err == ENOSYS) {
 			fsess_set_notimpl(mp, FUSE_LISTXATTR);
 			err = EOPNOTSUPP;
 		}
 		goto out;
 	}
 
 	list_xattr_out = fdi.answ;
 	linux_list_len = list_xattr_out->size;
 	if (linux_list_len == 0) {
 		if (ap->a_size != NULL)
 			*ap->a_size = linux_list_len;
 		goto out;
 	}
 
 	/*
 	 * Retrieve Linux / FUSE compatible list values.
 	 */
 	fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
 	list_xattr_in = fdi.indata;
 	list_xattr_in->size = linux_list_len + sizeof(*list_xattr_out);
 	attr_str = (char *)fdi.indata + sizeof(*list_xattr_in);
 	snprintf(attr_str, len, "%s%c", prefix, extattr_namespace_separator);
 
 	err = fdisp_wait_answ(&fdi);
 	if (err != 0)
 		goto out;
 
 	linux_list = fdi.answ;
 	linux_list_len = fdi.iosize;
 
 	/*
 	 * Retrieve the BSD compatible list values.
 	 * The Linux / FUSE attribute list format isn't the same
 	 * as FreeBSD's format. So we need to transform it into
 	 * FreeBSD's format before giving it to the user.
 	 */
 	bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK);
 	err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len,
 	    bsd_list, &bsd_list_len);
 	if (err != 0)
 		goto out;
 
 	if (ap->a_size != NULL)
 		*ap->a_size = bsd_list_len;
 
 	if (uio != NULL)
 		err = uiomove(bsd_list, bsd_list_len, uio);
 
 out:
 	free(bsd_list, M_TEMP);
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 /*
     struct vop_deleteextattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_attrnamespace;
 	const char *a_name;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct fuse_dispatcher fdi;
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	char *prefix;
 	size_t len;
 	char *attr_str;
 	int err;
 
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	if (!fsess_isimpl(mp, FUSE_REMOVEXATTR))
 		return EOPNOTSUPP;
 
 	if (vfs_isrdonly(mp))
 		return EROFS;
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
 		VWRITE);
 	if (err)
 		return err;
 
 	/* Default to looking for user attributes. */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
 	else
 		prefix = EXTATTR_NAMESPACE_USER_STRING;
 
 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
 	    strlen(ap->a_name) + 1;
 
 	fdisp_init(&fdi, len);
 	fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred);
 
 	attr_str = fdi.indata;
 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
 	    ap->a_name);
 
 	err = fdisp_wait_answ(&fdi);
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_REMOVEXATTR);
 		err = EOPNOTSUPP;
 	}
 
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 /*
     struct vnop_print_args {
 	struct vnode *a_vp;
     };
 */
 static int
 fuse_vnop_print(struct vop_print_args *ap)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp);
 
 	printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n",
 	    (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid,
 	    (uintmax_t)fvdat->nlookup,
 	    fvdat->flag);
 
 	return 0;
 }
 	
 /*
  * Get an NFS filehandle for a FUSE file.
  *
  * This will only work for FUSE file systems that guarantee the uniqueness of
  * nodeid:generation, which most don't.
  */
 /*
 vop_vptofh {
 	IN struct vnode *a_vp;
 	IN struct fid *a_fhp;
 };
 */
 static int
 fuse_vnop_vptofh(struct vop_vptofh_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_fid *fhp = (struct fuse_fid *)(ap->a_fhp);
 	_Static_assert(sizeof(struct fuse_fid) <= sizeof(struct fid),
 		"FUSE fid type is too big");
 	struct mount *mp = vnode_mount(vp);
 	struct fuse_data *data = fuse_get_mpdata(mp);
 	struct vattr va;
 	int err;
 
 	if (!(data->dataflags & FSESS_EXPORT_SUPPORT))
 		return EOPNOTSUPP;
 
 	err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread);
 	if (err)
 		return err;
 
 	/*ip = VTOI(ap->a_vp);*/
 	/*ufhp = (struct ufid *)ap->a_fhp;*/
 	fhp->len = sizeof(struct fuse_fid);
 	fhp->nid = fvdat->nid;
 	if (fvdat->generation <= UINT32_MAX)
 		fhp->gen = fvdat->generation;
 	else
 		return EOVERFLOW;
 	return (0);
 }
 
 
Index: projects/fuse2/tests/sys/fs/fusefs/Makefile
===================================================================
--- projects/fuse2/tests/sys/fs/fusefs/Makefile	(revision 349237)
+++ projects/fuse2/tests/sys/fs/fusefs/Makefile	(revision 349238)
@@ -1,78 +1,79 @@
 # $FreeBSD$
 
 PACKAGE=	tests
 
 TESTSDIR=	${TESTSBASE}/sys/fs/fusefs
 
 # We could simply link all of these files into a single executable.  But since
 # Kyua treats googletest programs as plain tests, it's better to separate them
 # out, so we get more granular reporting.
 GTESTS+=	access
 GTESTS+=	allow_other
+GTESTS+=	bmap
 GTESTS+=	create
 GTESTS+=	default_permissions
 GTESTS+=	default_permissions_privileged
 GTESTS+=	destroy
 GTESTS+=	dev_fuse_poll
 GTESTS+=	fifo
 GTESTS+=	flush
 GTESTS+=	forget
 GTESTS+=	fsync
 GTESTS+=	fsyncdir
 GTESTS+=	getattr
 GTESTS+=	interrupt
 GTESTS+=	io
 GTESTS+=	link
 GTESTS+=	locks
 GTESTS+=	lookup
 GTESTS+=	mkdir
 GTESTS+=	mknod
 GTESTS+=	mount
 GTESTS+=	nfs
 GTESTS+=	notify
 GTESTS+=	open
 GTESTS+=	opendir
 GTESTS+=	read
 GTESTS+=	readdir
 GTESTS+=	readlink
 GTESTS+=	release
 GTESTS+=	releasedir
 GTESTS+=	rename
 GTESTS+=	rmdir
 GTESTS+=	setattr
 GTESTS+=	statfs
 GTESTS+=	symlink
 GTESTS+=	unlink
 GTESTS+=	write
 GTESTS+=	xattr
 
 .for p in ${GTESTS}
 SRCS.$p+=	${p}.cc
 SRCS.$p+=	getmntopts.c
 SRCS.$p+=	mockfs.cc
 SRCS.$p+=	utils.cc
 .endfor
 
 TEST_METADATA.default_permissions+=	required_user="unprivileged"
 TEST_METADATA.default_permissions_privileged+=	required_user="root"
 TEST_METADATA.mknod+=	required_user="root"
 TEST_METADATA.nfs+=	required_user="root"
 
 # TODO: drastically increase timeout after test development is mostly complete
 TEST_METADATA+= timeout=10
 
 FUSEFS=		${SRCTOP}/sys/fs/fuse
 MOUNT=		${SRCTOP}/sbin/mount
 CXXFLAGS+=	-I${SRCTOP}/tests
 CXXFLAGS+=	-I${FUSEFS}
 CXXFLAGS+=	-I${MOUNT}
 .PATH:		${MOUNT}
 CXXSTD=		c++14
 
 LIBADD+=	pthread
 LIBADD+=	gmock gtest
 LIBADD+=	util
 
 WARNS?=	6
 
 .include <bsd.test.mk>
Index: projects/fuse2/tests/sys/fs/fusefs/bmap.cc
===================================================================
--- projects/fuse2/tests/sys/fs/fusefs/bmap.cc	(nonexistent)
+++ projects/fuse2/tests/sys/fs/fusefs/bmap.cc	(revision 349238)
@@ -0,0 +1,159 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * This software was developed by BFF Storage Systems, LLC under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+extern "C" {
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/filio.h>
+
+#include <fcntl.h>
+}
+
+#include "mockfs.hh"
+#include "utils.hh"
+
+using namespace testing;
+
+const static char FULLPATH[] = "mountpoint/foo";
+const static char RELPATH[] = "foo";
+
+class Bmap: public FuseTest {
+public:
+virtual void SetUp() {
+	m_maxreadahead = UINT32_MAX;
+	FuseTest::SetUp();
+}
+void expect_bmap(uint64_t ino, uint64_t lbn, uint32_t blocksize, uint64_t pbn)
+{
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_BMAP &&
+				in.header.nodeid == ino &&
+				in.body.bmap.block == lbn &&
+				in.body.bmap.blocksize == blocksize);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([=](auto i __unused, auto& out) {
+		SET_OUT_HEADER_LEN(out, bmap);
+		out.body.bmap.block = pbn;
+	})));
+}
+	
+void expect_lookup(const char *relpath, uint64_t ino, off_t size)
+{
+	FuseTest::expect_lookup(relpath, ino, S_IFREG | 0644, size, 1,
+		UINT64_MAX);
+}
+};
+
+/*
+ * Test FUSE_BMAP
+ * XXX The FUSE protocol does not include the runp and runb variables, so those
+ * must be guessed in-kernel.
+ */
+TEST_F(Bmap, bmap)
+{
+	struct fiobmap2_arg arg;
+	const off_t filesize = 1 << 20;
+	const ino_t ino = 42;
+	int64_t lbn = 10;
+	int64_t pbn = 12345;
+	int fd;
+
+	expect_lookup(RELPATH, 42, filesize);
+	expect_open(ino, 0, 1);
+	expect_bmap(ino, lbn, m_maxbcachebuf, pbn);
+
+	fd = open(FULLPATH, O_RDWR);
+	ASSERT_LE(0, fd) << strerror(errno);
+
+	arg.bn = lbn;
+	arg.runp = -1;
+	arg.runb = -1;
+	ASSERT_EQ(0, ioctl(fd, FIOBMAP2, &arg)) << strerror(errno);
+	EXPECT_EQ(arg.bn, pbn);
+	EXPECT_EQ(arg.runp, MAXPHYS / m_maxbcachebuf - 1);
+	EXPECT_EQ(arg.runb, MAXPHYS / m_maxbcachebuf - 1);
+}
+
+/* 
+ * If the daemon does not implement VOP_BMAP, fusefs should return sensible
+ * defaults.
+ */
+TEST_F(Bmap, default_)
+{
+	struct fiobmap2_arg arg;
+	const off_t filesize = 1 << 20;
+	const ino_t ino = 42;
+	int64_t lbn;
+	int fd;
+
+	expect_lookup(RELPATH, 42, filesize);
+	expect_open(ino, 0, 1);
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_BMAP);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnErrno(ENOSYS)));
+
+	fd = open(FULLPATH, O_RDWR);
+	ASSERT_LE(0, fd) << strerror(errno);
+
+	/* First block */
+	lbn = 0;
+	arg.bn = lbn;
+	arg.runp = -1;
+	arg.runb = -1;
+	ASSERT_EQ(0, ioctl(fd, FIOBMAP2, &arg)) << strerror(errno);
+	EXPECT_EQ(arg.bn, 0);
+	EXPECT_EQ(arg.runp, MAXPHYS / m_maxbcachebuf - 1);
+	EXPECT_EQ(arg.runb, 0);
+
+	/* In the middle */
+	lbn = filesize / m_maxbcachebuf / 2;
+	arg.bn = lbn;
+	arg.runp = -1;
+	arg.runb = -1;
+	ASSERT_EQ(0, ioctl(fd, FIOBMAP2, &arg)) << strerror(errno);
+	EXPECT_EQ(arg.bn, lbn * m_maxbcachebuf / DEV_BSIZE);
+	EXPECT_EQ(arg.runp, MAXPHYS / m_maxbcachebuf - 1);
+	EXPECT_EQ(arg.runb, MAXPHYS / m_maxbcachebuf - 1);
+
+	/* Last block */
+	lbn = filesize / m_maxbcachebuf - 1;
+	arg.bn = lbn;
+	arg.runp = -1;
+	arg.runb = -1;
+	ASSERT_EQ(0, ioctl(fd, FIOBMAP2, &arg)) << strerror(errno);
+	EXPECT_EQ(arg.bn, lbn * m_maxbcachebuf / DEV_BSIZE);
+	EXPECT_EQ(arg.runp, 0);
+	EXPECT_EQ(arg.runb, MAXPHYS / m_maxbcachebuf - 1);
+}

Property changes on: projects/fuse2/tests/sys/fs/fusefs/bmap.cc
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/fuse2/tests/sys/fs/fusefs/mockfs.cc
===================================================================
--- projects/fuse2/tests/sys/fs/fusefs/mockfs.cc	(revision 349237)
+++ projects/fuse2/tests/sys/fs/fusefs/mockfs.cc	(revision 349238)
@@ -1,693 +1,697 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2019 The FreeBSD Foundation
  *
  * This software was developed by BFF Storage Systems, LLC under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 extern "C" {
 #include <sys/param.h>
 
 #include <sys/mount.h>
 #include <sys/select.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
 #include <sys/user.h>
 
 #include <fcntl.h>
 #include <libutil.h>
 #include <poll.h>
 #include <pthread.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <unistd.h>
 
 #include "mntopts.h"	// for build_iovec
 }
 
 #include <cinttypes>
 
 #include <gtest/gtest.h>
 
 #include "mockfs.hh"
 
 using namespace testing;
 
 int verbosity = 0;
 
 const char* opcode2opname(uint32_t opcode)
 {
 	const int NUM_OPS = 39;
 	const char* table[NUM_OPS] = {
 		"Unknown (opcode 0)",
 		"LOOKUP",
 		"FORGET",
 		"GETATTR",
 		"SETATTR",
 		"READLINK",
 		"SYMLINK",
 		"Unknown (opcode 7)",
 		"MKNOD",
 		"MKDIR",
 		"UNLINK",
 		"RMDIR",
 		"RENAME",
 		"LINK",
 		"OPEN",
 		"READ",
 		"WRITE",
 		"STATFS",
 		"RELEASE",
 		"Unknown (opcode 19)",
 		"FSYNC",
 		"SETXATTR",
 		"GETXATTR",
 		"LISTXATTR",
 		"REMOVEXATTR",
 		"FLUSH",
 		"INIT",
 		"OPENDIR",
 		"READDIR",
 		"RELEASEDIR",
 		"FSYNCDIR",
 		"GETLK",
 		"SETLK",
 		"SETLKW",
 		"ACCESS",
 		"CREATE",
 		"INTERRUPT",
 		"BMAP",
 		"DESTROY"
 	};
 	if (opcode >= NUM_OPS)
 		return ("Unknown (opcode > max)");
 	else
 		return (table[opcode]);
 }
 
 ProcessMockerT
 ReturnErrno(int error)
 {
 	return([=](auto in, auto &out) {
 		std::unique_ptr<mockfs_buf_out> out0(new mockfs_buf_out);
 		out0->header.unique = in.header.unique;
 		out0->header.error = -error;
 		out0->header.len = sizeof(out0->header);
 		out.push_back(std::move(out0));
 	});
 }
 
 /* Helper function used for returning negative cache entries for LOOKUP */
 ProcessMockerT
 ReturnNegativeCache(const struct timespec *entry_valid)
 {
 	return([=](auto in, auto &out) {
 		/* nodeid means ENOENT and cache it */
 		std::unique_ptr<mockfs_buf_out> out0(new mockfs_buf_out);
 		out0->body.entry.nodeid = 0;
 		out0->header.unique = in.header.unique;
 		out0->header.error = 0;
 		out0->body.entry.entry_valid = entry_valid->tv_sec;
 		out0->body.entry.entry_valid_nsec = entry_valid->tv_nsec;
 		SET_OUT_HEADER_LEN(*out0, entry);
 		out.push_back(std::move(out0));
 	});
 }
 
 ProcessMockerT
 ReturnImmediate(std::function<void(const mockfs_buf_in& in,
 				   struct mockfs_buf_out &out)> f)
 {
 	return([=](auto& in, auto &out) {
 		std::unique_ptr<mockfs_buf_out> out0(new mockfs_buf_out);
 		out0->header.unique = in.header.unique;
 		f(in, *out0);
 		out.push_back(std::move(out0));
 	});
 }
 
 void sigint_handler(int __unused sig) {
 	// Don't do anything except interrupt the daemon's read(2) call
 }
 
 void MockFS::debug_request(const mockfs_buf_in &in)
 {
 	printf("%-11s ino=%2" PRIu64, opcode2opname(in.header.opcode),
 		in.header.nodeid);
 	if (verbosity > 1) {
 		printf(" uid=%5u gid=%5u pid=%5u unique=%" PRIu64 " len=%u",
 			in.header.uid, in.header.gid, in.header.pid,
 			in.header.unique, in.header.len);
 	}
 	switch (in.header.opcode) {
 		const char *name, *value;
 
 		case FUSE_ACCESS:
 			printf(" mask=%#x", in.body.access.mask);
 			break;
+		case FUSE_BMAP:
+			printf(" block=%#lx blocksize=%#x", in.body.bmap.block,
+				in.body.bmap.blocksize);
+			break;
 		case FUSE_CREATE:
 			if (m_kernel_minor_version >= 12)
 				name = (const char*)in.body.bytes +
 					sizeof(fuse_create_in);
 			else
 				name = (const char*)in.body.bytes +
 					sizeof(fuse_open_in);
 			printf(" flags=%#x name=%s",
 				in.body.open.flags, name);
 			break;
 		case FUSE_FLUSH:
 			printf(" fh=%#" PRIx64 " lock_owner=%" PRIu64,
 				in.body.flush.fh,
 				in.body.flush.lock_owner);
 			break;
 		case FUSE_FORGET:
 			printf(" nlookup=%" PRIu64, in.body.forget.nlookup);
 			break;
 		case FUSE_FSYNC:
 			printf(" flags=%#x", in.body.fsync.fsync_flags);
 			break;
 		case FUSE_FSYNCDIR:
 			printf(" flags=%#x", in.body.fsyncdir.fsync_flags);
 			break;
 		case FUSE_INTERRUPT:
 			printf(" unique=%" PRIu64, in.body.interrupt.unique);
 			break;
 		case FUSE_LINK:
 			printf(" oldnodeid=%" PRIu64, in.body.link.oldnodeid);
 			break;
 		case FUSE_LOOKUP:
 			printf(" %s", in.body.lookup);
 			break;
 		case FUSE_MKDIR:
 			name = (const char*)in.body.bytes +
 				sizeof(fuse_mkdir_in);
 			printf(" name=%s mode=%#o umask=%#o", name,
 				in.body.mkdir.mode, in.body.mkdir.umask);
 			break;
 		case FUSE_MKNOD:
 			if (m_kernel_minor_version >= 12)
 				name = (const char*)in.body.bytes +
 					sizeof(fuse_mknod_in);
 			else
 				name = (const char*)in.body.bytes +
 					FUSE_COMPAT_MKNOD_IN_SIZE;
 			printf(" mode=%#o rdev=%x umask=%#o name=%s",
 				in.body.mknod.mode, in.body.mknod.rdev,
 				in.body.mknod.umask, name);
 			break;
 		case FUSE_OPEN:
 			printf(" flags=%#x", in.body.open.flags);
 			break;
 		case FUSE_OPENDIR:
 			printf(" flags=%#x", in.body.opendir.flags);
 			break;
 		case FUSE_READ:
 			printf(" offset=%" PRIu64 " size=%u",
 				in.body.read.offset,
 				in.body.read.size);
 			if (verbosity > 1)
 				printf(" flags=%#x", in.body.read.flags);
 			break;
 		case FUSE_READDIR:
 			printf(" fh=%#" PRIx64 " offset=%" PRIu64 " size=%u",
 				in.body.readdir.fh, in.body.readdir.offset,
 				in.body.readdir.size);
 			break;
 		case FUSE_RELEASE:
 			printf(" fh=%#" PRIx64 " flags=%#x lock_owner=%" PRIu64,
 				in.body.release.fh,
 				in.body.release.flags,
 				in.body.release.lock_owner);
 			break;
 		case FUSE_SETATTR:
 			if (verbosity <= 1) {
 				printf(" valid=%#x", in.body.setattr.valid);
 				break;
 			}
 			if (in.body.setattr.valid & FATTR_MODE)
 				printf(" mode=%#o", in.body.setattr.mode);
 			if (in.body.setattr.valid & FATTR_UID)
 				printf(" uid=%u", in.body.setattr.uid);
 			if (in.body.setattr.valid & FATTR_GID)
 				printf(" gid=%u", in.body.setattr.gid);
 			if (in.body.setattr.valid & FATTR_SIZE)
 				printf(" size=%" PRIu64, in.body.setattr.size);
 			if (in.body.setattr.valid & FATTR_ATIME)
 				printf(" atime=%" PRIu64 ".%u",
 					in.body.setattr.atime,
 					in.body.setattr.atimensec);
 			if (in.body.setattr.valid & FATTR_MTIME)
 				printf(" mtime=%" PRIu64 ".%u",
 					in.body.setattr.mtime,
 					in.body.setattr.mtimensec);
 			if (in.body.setattr.valid & FATTR_FH)
 				printf(" fh=%" PRIu64 "", in.body.setattr.fh);
 			break;
 		case FUSE_SETLK:
 			printf(" fh=%#" PRIx64 " owner=%" PRIu64
 				" type=%u pid=%u",
 				in.body.setlk.fh, in.body.setlk.owner,
 				in.body.setlk.lk.type,
 				in.body.setlk.lk.pid);
 			if (verbosity >= 2) {
 				printf(" range=[%" PRIu64 "-%" PRIu64 "]",
 					in.body.setlk.lk.start,
 					in.body.setlk.lk.end);
 			}
 			break;
 		case FUSE_SETXATTR:
 			/* 
 			 * In theory neither the xattr name and value need be
 			 * ASCII, but in this test suite they always are.
 			 */
 			name = (const char*)in.body.bytes +
 				sizeof(fuse_setxattr_in);
 			value = name + strlen(name) + 1;
 			printf(" %s=%s", name, value);
 			break;
 		case FUSE_WRITE:
 			printf(" fh=%#" PRIx64 " offset=%" PRIu64
 				" size=%u write_flags=%u",
 				in.body.write.fh,
 				in.body.write.offset, in.body.write.size,
 				in.body.write.write_flags);
 			if (verbosity > 1)
 				printf(" flags=%#x", in.body.write.flags);
 			break;
 		default:
 			break;
 	}
 	printf("\n");
 }
 
 /* 
  * Debug a FUSE response.
  *
  * This is mostly useful for asynchronous notifications, which don't correspond
  * to any request
  */
 void MockFS::debug_response(const mockfs_buf_out &out) {
 	const char *name;
 
 	if (verbosity == 0)
 		return;
 
 	switch (out.header.error) {
 		case FUSE_NOTIFY_INVAL_ENTRY:
 			name = (const char*)out.body.bytes +
 				sizeof(fuse_notify_inval_entry_out);
 			printf("<- INVAL_ENTRY parent=%" PRIu64 " %s\n",
 				out.body.inval_entry.parent, name);
 			break;
 		case FUSE_NOTIFY_INVAL_INODE:
 			printf("<- INVAL_INODE ino=%" PRIu64 " off=%" PRIi64
 				" len=%" PRIi64 "\n",
 				out.body.inval_inode.ino,
 				out.body.inval_inode.off,
 				out.body.inval_inode.len);
 			break;
 		default:
 			break;
 	}
 }
 
 MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions,
 	bool push_symlinks_in, bool ro, enum poll_method pm, uint32_t flags,
 	uint32_t kernel_minor_version, uint32_t max_write, bool async,
 	bool noclusterr)
 {
 	struct sigaction sa;
 	struct iovec *iov = NULL;
 	int iovlen = 0;
 	char fdstr[15];
 	const bool trueval = true;
 
 	m_daemon_id = NULL;
 	m_kernel_minor_version = kernel_minor_version;
 	m_maxreadahead = max_readahead;
 	m_maxwrite = max_write;
 	m_nready = -1;
 	m_pm = pm;
 	m_quit = false;
 	if (m_pm == KQ)
 		m_kq = kqueue();
 	else
 		m_kq = -1;
 
 	/*
 	 * Kyua sets pwd to a testcase-unique tempdir; no need to use
 	 * mkdtemp
 	 */
 	/*
 	 * googletest doesn't allow ASSERT_ in constructors, so we must throw
 	 * instead.
 	 */
 	if (mkdir("mountpoint" , 0755) && errno != EEXIST)
 		throw(std::system_error(errno, std::system_category(),
 			"Couldn't make mountpoint directory"));
 
 	switch (m_pm) {
 	case BLOCKING:
 		m_fuse_fd = open("/dev/fuse", O_CLOEXEC | O_RDWR);
 		break;
 	default:
 		m_fuse_fd = open("/dev/fuse", O_CLOEXEC | O_RDWR | O_NONBLOCK);
 		break;
 	}
 	if (m_fuse_fd < 0)
 		throw(std::system_error(errno, std::system_category(),
 			"Couldn't open /dev/fuse"));
 
 	m_pid = getpid();
 	m_child_pid = -1;
 
 	build_iovec(&iov, &iovlen, "fstype", __DECONST(void *, "fusefs"), -1);
 	build_iovec(&iov, &iovlen, "fspath",
 		    __DECONST(void *, "mountpoint"), -1);
 	build_iovec(&iov, &iovlen, "from", __DECONST(void *, "/dev/fuse"), -1);
 	sprintf(fdstr, "%d", m_fuse_fd);
 	build_iovec(&iov, &iovlen, "fd", fdstr, -1);
 	if (allow_other) {
 		build_iovec(&iov, &iovlen, "allow_other",
 			__DECONST(void*, &trueval), sizeof(bool));
 	}
 	if (default_permissions) {
 		build_iovec(&iov, &iovlen, "default_permissions",
 			__DECONST(void*, &trueval), sizeof(bool));
 	}
 	if (push_symlinks_in) {
 		build_iovec(&iov, &iovlen, "push_symlinks_in",
 			__DECONST(void*, &trueval), sizeof(bool));
 	}
 	if (ro) {
 		build_iovec(&iov, &iovlen, "ro",
 			__DECONST(void*, &trueval), sizeof(bool));
 	}
 	if (async) {
 		build_iovec(&iov, &iovlen, "async", __DECONST(void*, &trueval),
 			sizeof(bool));
 	}
 	if (noclusterr) {
 		build_iovec(&iov, &iovlen, "noclusterr",
 			__DECONST(void*, &trueval), sizeof(bool));
 	}
 	if (nmount(iov, iovlen, 0))
 		throw(std::system_error(errno, std::system_category(),
 			"Couldn't mount filesystem"));
 
 	// Setup default handler
 	ON_CALL(*this, process(_, _))
 		.WillByDefault(Invoke(this, &MockFS::process_default));
 
 	init(flags);
 	bzero(&sa, sizeof(sa));
 	sa.sa_handler = sigint_handler;
 	sa.sa_flags = 0;	/* Don't set SA_RESTART! */
 	if (0 != sigaction(SIGUSR1, &sa, NULL))
 		throw(std::system_error(errno, std::system_category(),
 			"Couldn't handle SIGUSR1"));
 	if (pthread_create(&m_daemon_id, NULL, service, (void*)this))
 		throw(std::system_error(errno, std::system_category(),
 			"Couldn't Couldn't start fuse thread"));
 }
 
 MockFS::~MockFS() {
 	kill_daemon();
 	if (m_daemon_id != NULL) {
 		pthread_join(m_daemon_id, NULL);
 		m_daemon_id = NULL;
 	}
 	::unmount("mountpoint", MNT_FORCE);
 	rmdir("mountpoint");
 	if (m_kq >= 0)
 		close(m_kq);
 }
 
 void MockFS::init(uint32_t flags) {
 	std::unique_ptr<mockfs_buf_in> in(new mockfs_buf_in);
 	std::unique_ptr<mockfs_buf_out> out(new mockfs_buf_out);
 
 	read_request(*in);
 	ASSERT_EQ(FUSE_INIT, in->header.opcode);
 
 	out->header.unique = in->header.unique;
 	out->header.error = 0;
 	out->body.init.major = FUSE_KERNEL_VERSION;
 	out->body.init.minor = m_kernel_minor_version;;
 	out->body.init.flags = in->body.init.flags & flags;
 
 	out->body.init.max_write = m_maxwrite;
 
 	out->body.init.max_readahead = m_maxreadahead;
 	SET_OUT_HEADER_LEN(*out, init);
 	write(m_fuse_fd, out.get(), out->header.len);
 }
 
 void MockFS::kill_daemon() {
 	m_quit = true;
 	if (m_daemon_id != NULL)
 		pthread_kill(m_daemon_id, SIGUSR1);
 	// Closing the /dev/fuse file descriptor first allows unmount to
 	// succeed even if the daemon doesn't correctly respond to commands
 	// during the unmount sequence.
 	close(m_fuse_fd);
 	m_fuse_fd = -1;
 }
 
 void MockFS::loop() {
 	std::vector<std::unique_ptr<mockfs_buf_out>> out;
 
 	std::unique_ptr<mockfs_buf_in> in(new mockfs_buf_in);
 	ASSERT_TRUE(in != NULL);
 	while (!m_quit) {
 		bzero(in.get(), sizeof(*in));
 		read_request(*in);
 		if (m_quit)
 			break;
 		if (verbosity > 0)
 			debug_request(*in);
 		if (pid_ok((pid_t)in->header.pid)) {
 			process(*in, out);
 		} else {
 			/* 
 			 * Reject any requests from unknown processes.  Because
 			 * we actually do mount a filesystem, plenty of
 			 * unrelated system daemons may try to access it.
 			 */
 			if (verbosity > 1)
 				printf("\tREJECTED (wrong pid %d)\n",
 					in->header.pid);
 			process_default(*in, out);
 		}
 		for (auto &it: out)
 			write_response(*it);
 		out.clear();
 	}
 }
 
 int MockFS::notify_inval_entry(ino_t parent, const char *name, size_t namelen)
 {
 	std::unique_ptr<mockfs_buf_out> out(new mockfs_buf_out);
 
 	out->header.unique = 0;	/* 0 means asynchronous notification */
 	out->header.error = FUSE_NOTIFY_INVAL_ENTRY;
 	out->body.inval_entry.parent = parent;
 	out->body.inval_entry.namelen = namelen;
 	strlcpy((char*)&out->body.bytes + sizeof(out->body.inval_entry),
 		name, sizeof(out->body.bytes) - sizeof(out->body.inval_entry));
 	out->header.len = sizeof(out->header) + sizeof(out->body.inval_entry) +
 		namelen;
 	debug_response(*out);
 	write_response(*out);
 	return 0;
 }
 
 int MockFS::notify_inval_inode(ino_t ino, off_t off, ssize_t len)
 {
 	std::unique_ptr<mockfs_buf_out> out(new mockfs_buf_out);
 
 	out->header.unique = 0;	/* 0 means asynchronous notification */
 	out->header.error = FUSE_NOTIFY_INVAL_INODE;
 	out->body.inval_inode.ino = ino;
 	out->body.inval_inode.off = off;
 	out->body.inval_inode.len = len;
 	out->header.len = sizeof(out->header) + sizeof(out->body.inval_inode);
 	debug_response(*out);
 	write_response(*out);
 	return 0;
 }
 
 bool MockFS::pid_ok(pid_t pid) {
 	if (pid == m_pid) {
 		return (true);
 	} else if (pid == m_child_pid) {
 		return (true);
 	} else {
 		struct kinfo_proc *ki;
 		bool ok = false;
 
 		ki = kinfo_getproc(pid);
 		if (ki == NULL)
 			return (false);
 		/* 
 		 * Allow access by the aio daemon processes so that our tests
 		 * can use aio functions
 		 */
 		if (0 == strncmp("aiod", ki->ki_comm, 4))
 			ok = true;
 		free(ki);
 		return (ok);
 	}
 }
 
 void MockFS::process_default(const mockfs_buf_in& in,
 		std::vector<std::unique_ptr<mockfs_buf_out>> &out)
 {
 	std::unique_ptr<mockfs_buf_out> out0(new mockfs_buf_out);
 	out0->header.unique = in.header.unique;
 	out0->header.error = -EOPNOTSUPP;
 	out0->header.len = sizeof(out0->header);
 	out.push_back(std::move(out0));
 }
 
 void MockFS::read_request(mockfs_buf_in &in) {
 	ssize_t res;
 	int nready = 0;
 	fd_set readfds;
 	pollfd fds[1];
 	struct kevent changes[1];
 	struct kevent events[1];
 	struct timespec timeout_ts;
 	struct timeval timeout_tv;
 	const int timeout_ms = 999;
 	int timeout_int, nfds;
 
 	switch (m_pm) {
 	case BLOCKING:
 		break;
 	case KQ:
 		timeout_ts.tv_sec = 0;
 		timeout_ts.tv_nsec = timeout_ms * 1'000'000;
 		while (nready == 0) {
 			EV_SET(&changes[0], m_fuse_fd, EVFILT_READ, EV_ADD, 0,
 				0, 0);
 			nready = kevent(m_kq, &changes[0], 1, &events[0], 1,
 				&timeout_ts);
 			if (m_quit)
 				return;
 		}
 		ASSERT_LE(0, nready) << strerror(errno);
 		ASSERT_EQ(events[0].ident, (uintptr_t)m_fuse_fd);
 		if (events[0].flags & EV_ERROR)
 			FAIL() << strerror(events[0].data);
 		else if (events[0].flags & EV_EOF)
 			FAIL() << strerror(events[0].fflags);
 		m_nready = events[0].data;
 		break;
 	case POLL:
 		timeout_int = timeout_ms;
 		fds[0].fd = m_fuse_fd;
 		fds[0].events = POLLIN;
 		while (nready == 0) {
 			nready = poll(fds, 1, timeout_int);
 			if (m_quit)
 				return;
 		}
 		ASSERT_LE(0, nready) << strerror(errno);
 		ASSERT_TRUE(fds[0].revents & POLLIN);
 		break;
 	case SELECT:
 		timeout_tv.tv_sec = 0;
 		timeout_tv.tv_usec = timeout_ms * 1'000;
 		nfds = m_fuse_fd + 1;
 		while (nready == 0) {
 			FD_ZERO(&readfds);
 			FD_SET(m_fuse_fd, &readfds);
 			nready = select(nfds, &readfds, NULL, NULL,
 				&timeout_tv);
 			if (m_quit)
 				return;
 		}
 		ASSERT_LE(0, nready) << strerror(errno);
 		ASSERT_TRUE(FD_ISSET(m_fuse_fd, &readfds));
 		break;
 	default:
 		FAIL() << "not yet implemented";
 	}
 	res = read(m_fuse_fd, &in, sizeof(in));
 
 	if (res < 0 && !m_quit) {
 		FAIL() << "read: " << strerror(errno);
 		m_quit = true;
 	}
 	ASSERT_TRUE(res >= static_cast<ssize_t>(sizeof(in.header)) || m_quit);
 }
 
 void MockFS::write_response(const mockfs_buf_out &out) {
 	fd_set writefds;
 	pollfd fds[1];
 	int nready, nfds;
 	ssize_t r;
 
 	switch (m_pm) {
 	case BLOCKING:
 	case KQ:	/* EVFILT_WRITE is not supported */
 		break;
 	case POLL:
 		fds[0].fd = m_fuse_fd;
 		fds[0].events = POLLOUT;
 		nready = poll(fds, 1, INFTIM);
 		ASSERT_LE(0, nready) << strerror(errno);
 		ASSERT_EQ(1, nready) << "NULL timeout expired?";
 		ASSERT_TRUE(fds[0].revents & POLLOUT);
 		break;
 	case SELECT:
 		FD_ZERO(&writefds);
 		FD_SET(m_fuse_fd, &writefds);
 		nfds = m_fuse_fd + 1;
 		nready = select(nfds, NULL, &writefds, NULL, NULL);
 		ASSERT_LE(0, nready) << strerror(errno);
 		ASSERT_EQ(1, nready) << "NULL timeout expired?";
 		ASSERT_TRUE(FD_ISSET(m_fuse_fd, &writefds));
 		break;
 	default:
 		FAIL() << "not yet implemented";
 	}
 	r = write(m_fuse_fd, &out, out.header.len);
 	ASSERT_TRUE(r > 0 || errno == EAGAIN) << strerror(errno);
 }
 
 void* MockFS::service(void *pthr_data) {
 	MockFS *mock_fs = (MockFS*)pthr_data;
 
 	mock_fs->loop();
 
 	return (NULL);
 }
 
 void MockFS::unmount() {
 	::unmount("mountpoint", 0);
 }
Index: projects/fuse2/tests/sys/fs/fusefs/mockfs.hh
===================================================================
--- projects/fuse2/tests/sys/fs/fusefs/mockfs.hh	(revision 349237)
+++ projects/fuse2/tests/sys/fs/fusefs/mockfs.hh	(revision 349238)
@@ -1,361 +1,363 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2019 The FreeBSD Foundation
  *
  * This software was developed by BFF Storage Systems, LLC under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 extern "C" {
 #include <sys/types.h>
 
 #include <pthread.h>
 
 #include "fuse_kernel.h"
 }
 
 #include <gmock/gmock.h>
 
 #define TIME_T_MAX (std::numeric_limits<time_t>::max())
 
 /* 
  * A pseudo-fuse errno used indicate that a fuse operation should have no
  * response, at least not immediately
  */
 #define FUSE_NORESPONSE 9999
 
 #define SET_OUT_HEADER_LEN(out, variant) { \
 	(out).header.len = (sizeof((out).header) + \
 			    sizeof((out).body.variant)); \
 }
 
 /*
  * Create an expectation on FUSE_LOOKUP and return it so the caller can set
  * actions.
  *
  * This must be a macro instead of a method because EXPECT_CALL returns a type
  * with a deleted constructor.
  */
 #define EXPECT_LOOKUP(parent, path)					\
 	EXPECT_CALL(*m_mock, process(					\
 		ResultOf([=](auto in) {					\
 			return (in.header.opcode == FUSE_LOOKUP &&	\
 				in.header.nodeid == (parent) &&	\
 				strcmp(in.body.lookup, (path)) == 0);	\
 		}, Eq(true)),						\
 		_)							\
 	)
 
 extern int verbosity;
 
 /* This struct isn't defined by fuse_kernel.h or libfuse, but it should be */
 struct fuse_create_out {
 	struct fuse_entry_out	entry;
 	struct fuse_open_out	open;
 };
 
 /* Protocol 7.8 version of struct fuse_attr */
 struct fuse_attr_7_8
 {
 	__u64	ino;
 	__u64	size;
 	__u64	blocks;
 	__u64	atime;
 	__u64	mtime;
 	__u64	ctime;
 	__u32	atimensec;
 	__u32	mtimensec;
 	__u32	ctimensec;
 	__u32	mode;
 	__u32	nlink;
 	__u32	uid;
 	__u32	gid;
 	__u32	rdev;
 };
 
 /* Protocol 7.8 version of struct fuse_attr_out */
 struct fuse_attr_out_7_8
 {
 	__u64	attr_valid;
 	__u32	attr_valid_nsec;
 	__u32	dummy;
 	struct fuse_attr_7_8 attr;
 };
 
 /* Protocol 7.8 version of struct fuse_entry_out */
 struct fuse_entry_out_7_8 {
 	__u64	nodeid;		/* Inode ID */
 	__u64	generation;	/* Inode generation: nodeid:gen must
 				   be unique for the fs's lifetime */
 	__u64	entry_valid;	/* Cache timeout for the name */
 	__u64	attr_valid;	/* Cache timeout for the attributes */
 	__u32	entry_valid_nsec;
 	__u32	attr_valid_nsec;
 	struct fuse_attr_7_8 attr;
 };
 
 /* Output struct for FUSE_CREATE for protocol 7.8 servers */
 struct fuse_create_out_7_8 {
 	struct fuse_entry_out_7_8	entry;
 	struct fuse_open_out	open;
 };
 
 union fuse_payloads_in {
 	fuse_access_in	access;
+	fuse_bmap_in	bmap;
 	/* value is from fuse_kern_chan.c in fusefs-libs */
 	uint8_t		bytes[0x21000 - sizeof(struct fuse_in_header)];
 	fuse_create_in	create;
 	fuse_flush_in	flush;
 	fuse_fsync_in	fsync;
 	fuse_fsync_in	fsyncdir;
 	fuse_forget_in	forget;
 	fuse_interrupt_in interrupt;
 	fuse_lk_in	getlk;
 	fuse_getxattr_in getxattr;
 	fuse_init_in	init;
 	fuse_link_in	link;
 	fuse_listxattr_in listxattr;
 	char		lookup[0];
 	fuse_mkdir_in	mkdir;
 	fuse_mknod_in	mknod;
 	fuse_open_in	open;
 	fuse_open_in	opendir;
 	fuse_read_in	read;
 	fuse_read_in	readdir;
 	fuse_release_in	release;
 	fuse_release_in	releasedir;
 	fuse_rename_in	rename;
 	char		rmdir[0];
 	fuse_setattr_in	setattr;
 	fuse_setxattr_in setxattr;
 	fuse_lk_in	setlk;
 	fuse_lk_in	setlkw;
 	char		unlink[0];
 	fuse_write_in	write;
 };
 
 struct mockfs_buf_in {
 	fuse_in_header		header;
 	union fuse_payloads_in	body;
 };
 
 union fuse_payloads_out {
 	fuse_attr_out		attr;
 	fuse_attr_out_7_8	attr_7_8;
+	fuse_bmap_out		bmap;
 	fuse_create_out		create;
 	fuse_create_out_7_8	create_7_8;
 	/*
 	 * The protocol places no limits on the size of bytes.  Choose
 	 * a size big enough for anything we'll test.
 	 */
 	uint8_t			bytes[0x20000];
 	fuse_entry_out		entry;
 	fuse_entry_out_7_8	entry_7_8;
 	fuse_lk_out		getlk;
 	fuse_getxattr_out	getxattr;
 	fuse_init_out		init;
 	/* The inval_entry structure should be followed by the entry's name */
 	fuse_notify_inval_entry_out	inval_entry;
 	fuse_notify_inval_inode_out	inval_inode;
 	fuse_listxattr_out	listxattr;
 	fuse_open_out		open;
 	fuse_statfs_out		statfs;
 	/*
 	 * The protocol places no limits on the length of the string.  This is
 	 * merely convenient for testing.
 	 */
 	char			str[80];
 	fuse_write_out		write;
 };
 
 struct mockfs_buf_out {
 	fuse_out_header		header;
 	union fuse_payloads_out	body;
 
 	/* Default constructor: zero everything */
 	mockfs_buf_out() {
 		memset(this, 0, sizeof(*this));
 	}
 };
 
 /* A function that can be invoked in place of MockFS::process */
 typedef std::function<void (const mockfs_buf_in& in,
 			    std::vector<std::unique_ptr<mockfs_buf_out>> &out)>
 ProcessMockerT;
 
 /*
  * Helper function used for setting an error expectation for any fuse operation.
  * The operation will return the supplied error
  */
 ProcessMockerT ReturnErrno(int error);
 
 /* Helper function used for returning negative cache entries for LOOKUP */
 ProcessMockerT ReturnNegativeCache(const struct timespec *entry_valid);
 
 /* Helper function used for returning a single immediate response */
 ProcessMockerT ReturnImmediate(
 	std::function<void(const mockfs_buf_in& in,
 			   struct mockfs_buf_out &out)> f);
 
 /* How the daemon should check /dev/fuse for readiness */
 enum poll_method {
 	BLOCKING,
 	SELECT,
 	POLL,
 	KQ
 };
 
 /*
  * Fake FUSE filesystem
  *
  * "Mounts" a filesystem to a temporary directory and services requests
  * according to the programmed expectations.
  *
  * Operates directly on the fusefs(4) kernel API, not the libfuse(3) user api.
  */
 class MockFS {
 	/*
 	 * thread id of the fuse daemon thread
 	 *
 	 * It must run in a separate thread so it doesn't deadlock with the
 	 * client test code.
 	 */
 	pthread_t m_daemon_id;
 
 	/* file descriptor of /dev/fuse control device */
 	int m_fuse_fd;
 	
 	/* The minor version of the kernel API that this mock daemon targets */
 	uint32_t m_kernel_minor_version;
 
 	int m_kq;
 
 	/* The max_readahead file system option */
 	uint32_t m_maxreadahead;
 
 	/* pid of the test process */
 	pid_t m_pid;
 
 	/* Method the daemon should use for I/O to and from /dev/fuse */
 	enum poll_method m_pm;
 
 	void debug_request(const mockfs_buf_in&);
 	void debug_response(const mockfs_buf_out&);
 
 	/* Initialize a session after mounting */
 	void init(uint32_t flags);
 
 	/* Is pid from a process that might be involved in the test? */
 	bool pid_ok(pid_t pid);
 
 	/* Default request handler */
 	void process_default(const mockfs_buf_in&,
 		std::vector<std::unique_ptr<mockfs_buf_out>>&);
 
 	/* Entry point for the daemon thread */
 	static void* service(void*);
 
 	/* Read, but do not process, a single request from the kernel */
 	void read_request(mockfs_buf_in& in);
 
 	/* Write a single response back to the kernel */
 	void write_response(const mockfs_buf_out &out);
 
 	public:
 	/* pid of child process, for two-process test cases */
 	pid_t m_child_pid;
 
 	/* Maximum size of a FUSE_WRITE write */
 	uint32_t m_maxwrite;
 
 	/* 
 	 * Number of events that were available from /dev/fuse after the last
 	 * kevent call.  Only valid when m_pm = KQ.
 	 */
 	int m_nready;
 
 	/* Tell the daemon to shut down ASAP */
 	bool m_quit;
 
 	/* Create a new mockfs and mount it to a tempdir */
 	MockFS(int max_readahead, bool allow_other,
 		bool default_permissions, bool push_symlinks_in, bool ro,
 		enum poll_method pm, uint32_t flags,
 		uint32_t kernel_minor_version, uint32_t max_write, bool async,
 		bool no_clusterr);
 
 	virtual ~MockFS();
 
 	/* Kill the filesystem daemon without unmounting the filesystem */
 	void kill_daemon();
 
 	/* Process FUSE requests endlessly */
 	void loop();
 
 	/*
 	 * Send an asynchronous notification to invalidate a directory entry.
 	 * Similar to libfuse's fuse_lowlevel_notify_inval_entry
 	 *
 	 * This method will block until the client has responded, so it should
 	 * generally be run in a separate thread from request processing.
 	 *
 	 * @param	parent	Parent directory's inode number
 	 * @param	name	name of dirent to invalidate
 	 * @param	namelen	size of name, including the NUL
 	 */
 	int notify_inval_entry(ino_t parent, const char *name, size_t namelen);
 
 	/*
 	 * Send an asynchronous notification to invalidate an inode's cached
 	 * data and/or attributes.  Similar to libfuse's
 	 * fuse_lowlevel_notify_inval_inode.
 	 *
 	 * This method will block until the client has responded, so it should
 	 * generally be run in a separate thread from request processing.
 	 *
 	 * @param	ino	File's inode number
 	 * @param	off	offset at which to begin invalidation.  A
 	 * 			negative offset means to invalidate attributes
 	 * 			only.
 	 * @param	len	Size of region of data to invalidate.  0 means
 	 * 			to invalidate all cached data.
 	 */
 	int notify_inval_inode(ino_t ino, off_t off, ssize_t len);
 
 	/* 
 	 * Request handler
 	 *
 	 * This method is expected to provide the responses to each FUSE
 	 * operation.  For an immediate response, push one buffer into out.
 	 * For a delayed response, push nothing.  For an immediate response
 	 * plus a delayed response to an earlier operation, push two bufs.
 	 * Test cases must define each response using Googlemock expectations
 	 */
 	MOCK_METHOD2(process, void(const mockfs_buf_in&,
 				std::vector<std::unique_ptr<mockfs_buf_out>>&));
 
 	/* Gracefully unmount */
 	void unmount();
 };
Index: projects/fuse2/tests/sys/fs/fusefs/read.cc
===================================================================
--- projects/fuse2/tests/sys/fs/fusefs/read.cc	(revision 349237)
+++ projects/fuse2/tests/sys/fs/fusefs/read.cc	(revision 349238)
@@ -1,837 +1,801 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2019 The FreeBSD Foundation
  *
  * This software was developed by BFF Storage Systems, LLC under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 extern "C" {
-#include <sys/types.h>
+#include <sys/param.h>
 #include <sys/mman.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <aio.h>
 #include <fcntl.h>
 #include <semaphore.h>
 #include <unistd.h>
 }
 
 #include "mockfs.hh"
 #include "utils.hh"
 
 using namespace testing;
 
 class Read: public FuseTest {
 
 public:
 void expect_lookup(const char *relpath, uint64_t ino, uint64_t size)
 {
 	FuseTest::expect_lookup(relpath, ino, S_IFREG | 0644, size, 1);
 }
 };
 
 class Read_7_8: public FuseTest {
 public:
 virtual void SetUp() {
 	m_kernel_minor_version = 8;
 	FuseTest::SetUp();
 }
 
 void expect_lookup(const char *relpath, uint64_t ino, uint64_t size)
 {
 	FuseTest::expect_lookup_7_8(relpath, ino, S_IFREG | 0644, size, 1);
 }
 };
 
 class AioRead: public Read {
 public:
 virtual void SetUp() {
 	const char *node = "vfs.aio.enable_unsafe";
 	int val = 0;
 	size_t size = sizeof(val);
 
 	FuseTest::SetUp();
 
 	ASSERT_EQ(0, sysctlbyname(node, &val, &size, NULL, 0))
 		<< strerror(errno);
 	if (!val)
 		GTEST_SKIP() <<
 			"vfs.aio.enable_unsafe must be set for this test";
 }
 };
 
 class AsyncRead: public AioRead {
 	virtual void SetUp() {
 		m_init_flags = FUSE_ASYNC_READ;
 		AioRead::SetUp();
 	}
 };
 
 class ReadCacheable: public Read {
 public:
 virtual void SetUp() {
 	const char *node = "vfs.fusefs.data_cache_mode";
 	int val = 0;
 	size_t size = sizeof(val);
 
 	FuseTest::SetUp();
 
 	ASSERT_EQ(0, sysctlbyname(node, &val, &size, NULL, 0))
 		<< strerror(errno);
 	if (val == 0)
 		GTEST_SKIP() <<
 			"fusefs data caching must be enabled for this test";
 }
 };
 
 class ReadAhead: public ReadCacheable,
 		 public WithParamInterface<tuple<bool, uint32_t>>
 {
 	virtual void SetUp() {
 		m_maxreadahead = get<1>(GetParam());
 		m_noclusterr = get<0>(GetParam());
 		ReadCacheable::SetUp();
 	}
 };
 
 /* AIO reads need to set the header's pid field correctly */
 /* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236379 */
 TEST_F(AioRead, aio_read)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	char buf[bufsize];
 	struct aiocb iocb, *piocb;
 
 	expect_lookup(RELPATH, ino, bufsize);
 	expect_open(ino, 0, 1);
 	expect_read(ino, 0, bufsize, bufsize, CONTENTS);
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	iocb.aio_nbytes = bufsize;
 	iocb.aio_fildes = fd;
 	iocb.aio_buf = buf;
 	iocb.aio_offset = 0;
 	iocb.aio_sigevent.sigev_notify = SIGEV_NONE;
 	ASSERT_EQ(0, aio_read(&iocb)) << strerror(errno);
 	ASSERT_EQ(bufsize, aio_waitcomplete(&piocb, NULL)) << strerror(errno);
 	ASSERT_EQ(0, memcmp(buf, CONTENTS, bufsize));
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* 
  * Without the FUSE_ASYNC_READ mount option, fuse(4) should ensure that there
  * is at most one outstanding read operation per file handle
  */
 TEST_F(AioRead, async_read_disabled)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = 50;
 	char buf0[bufsize], buf1[bufsize];
 	off_t off0 = 0;
 	off_t off1 = 65536;
 	struct aiocb iocb0, iocb1;
 	volatile sig_atomic_t read_count = 0;
 
 	expect_lookup(RELPATH, ino, 131072);
 	expect_open(ino, 0, 1);
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_READ &&
 				in.header.nodeid == ino &&
 				in.body.read.fh == FH &&
 				in.body.read.offset == (uint64_t)off0);
 		}, Eq(true)),
 		_)
 	).WillRepeatedly(Invoke([&](auto in __unused, auto &out __unused) {
 		read_count++;
 		/* Filesystem is slow to respond */
 	}));
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_READ &&
 				in.header.nodeid == ino &&
 				in.body.read.fh == FH &&
 				in.body.read.offset == (uint64_t)off1);
 		}, Eq(true)),
 		_)
 	).WillRepeatedly(Invoke([&](auto in __unused, auto &out __unused) {
 		read_count++;
 		/* Filesystem is slow to respond */
 	}));
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	/* 
 	 * Submit two AIO read requests, and respond to neither.  If the
 	 * filesystem ever gets the second read request, then we failed to
 	 * limit outstanding reads.
 	 */
 	iocb0.aio_nbytes = bufsize;
 	iocb0.aio_fildes = fd;
 	iocb0.aio_buf = buf0;
 	iocb0.aio_offset = off0;
 	iocb0.aio_sigevent.sigev_notify = SIGEV_NONE;
 	ASSERT_EQ(0, aio_read(&iocb0)) << strerror(errno);
 
 	iocb1.aio_nbytes = bufsize;
 	iocb1.aio_fildes = fd;
 	iocb1.aio_buf = buf1;
 	iocb1.aio_offset = off1;
 	iocb1.aio_sigevent.sigev_notify = SIGEV_NONE;
 	ASSERT_EQ(0, aio_read(&iocb1)) << strerror(errno);
 
 	/* 
 	 * Sleep for awhile to make sure the kernel has had a chance to issue
 	 * the second read, even though the first has not yet returned
 	 */
 	nap();
 	EXPECT_EQ(read_count, 1);
 	
 	m_mock->kill_daemon();
 	/* Wait for AIO activity to complete, but ignore errors */
 	(void)aio_waitcomplete(NULL, NULL);
 
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* 
  * With the FUSE_ASYNC_READ mount option, fuse(4) may issue multiple
  * simultaneous read requests on the same file handle.
  */
 TEST_F(AsyncRead, async_read)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = 50;
 	char buf0[bufsize], buf1[bufsize];
 	off_t off0 = 0;
 	off_t off1 = 65536;
 	struct aiocb iocb0, iocb1;
 	sem_t sem;
 
 	ASSERT_EQ(0, sem_init(&sem, 0, 0)) << strerror(errno);
 
 	expect_lookup(RELPATH, ino, 131072);
 	expect_open(ino, 0, 1);
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_READ &&
 				in.header.nodeid == ino &&
 				in.body.read.fh == FH &&
 				in.body.read.offset == (uint64_t)off0);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke([&](auto in __unused, auto &out __unused) {
 		sem_post(&sem);
 		/* Filesystem is slow to respond */
 	}));
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_READ &&
 				in.header.nodeid == ino &&
 				in.body.read.fh == FH &&
 				in.body.read.offset == (uint64_t)off1);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke([&](auto in __unused, auto &out __unused) {
 		sem_post(&sem);
 		/* Filesystem is slow to respond */
 	}));
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	/* 
 	 * Submit two AIO read requests, but respond to neither.  Ensure that
 	 * we received both.
 	 */
 	iocb0.aio_nbytes = bufsize;
 	iocb0.aio_fildes = fd;
 	iocb0.aio_buf = buf0;
 	iocb0.aio_offset = off0;
 	iocb0.aio_sigevent.sigev_notify = SIGEV_NONE;
 	ASSERT_EQ(0, aio_read(&iocb0)) << strerror(errno);
 
 	iocb1.aio_nbytes = bufsize;
 	iocb1.aio_fildes = fd;
 	iocb1.aio_buf = buf1;
 	iocb1.aio_offset = off1;
 	iocb1.aio_sigevent.sigev_notify = SIGEV_NONE;
 	ASSERT_EQ(0, aio_read(&iocb1)) << strerror(errno);
 
 	/* Wait until both reads have reached the daemon */
 	ASSERT_EQ(0, sem_wait(&sem)) << strerror(errno);
 	ASSERT_EQ(0, sem_wait(&sem)) << strerror(errno);
 
 	m_mock->kill_daemon();
 	/* Wait for AIO activity to complete, but ignore errors */
 	(void)aio_waitcomplete(NULL, NULL);
 	
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* 0-length reads shouldn't cause any confusion */
 TEST_F(Read, direct_io_read_nothing)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	uint64_t ino = 42;
 	int fd;
 	uint64_t offset = 100;
 	char buf[80];
 
 	expect_lookup(RELPATH, ino, offset + 1000);
 	expect_open(ino, FOPEN_DIRECT_IO, 1);
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(0, pread(fd, buf, 0, offset)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* 
  * With direct_io, reads should not fill the cache.  They should go straight to
  * the daemon
  */
 TEST_F(Read, direct_io_pread)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	uint64_t offset = 100;
 	ssize_t bufsize = strlen(CONTENTS);
 	char buf[bufsize];
 
 	expect_lookup(RELPATH, ino, offset + bufsize);
 	expect_open(ino, FOPEN_DIRECT_IO, 1);
 	expect_read(ino, offset, bufsize, bufsize, CONTENTS);
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, pread(fd, buf, bufsize, offset)) << strerror(errno);
 	ASSERT_EQ(0, memcmp(buf, CONTENTS, bufsize));
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* 
  * With direct_io, filesystems are allowed to return less data than is
  * requested.  fuse(4) should return a short read to userland.
  */
 TEST_F(Read, direct_io_short_read)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefghijklmnop";
 	uint64_t ino = 42;
 	int fd;
 	uint64_t offset = 100;
 	ssize_t bufsize = strlen(CONTENTS);
 	ssize_t halfbufsize = bufsize / 2;
 	char buf[bufsize];
 
 	expect_lookup(RELPATH, ino, offset + bufsize);
 	expect_open(ino, FOPEN_DIRECT_IO, 1);
 	expect_read(ino, offset, bufsize, halfbufsize, CONTENTS);
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(halfbufsize, pread(fd, buf, bufsize, offset))
 		<< strerror(errno);
 	ASSERT_EQ(0, memcmp(buf, CONTENTS, halfbufsize));
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 TEST_F(Read, eio)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	char buf[bufsize];
 
 	expect_lookup(RELPATH, ino, bufsize);
 	expect_open(ino, 0, 1);
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_READ);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(ReturnErrno(EIO)));
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(-1, read(fd, buf, bufsize)) << strerror(errno);
 	ASSERT_EQ(EIO, errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* 
  * With the keep_cache option, the kernel may keep its read cache across
  * multiple open(2)s.
  */
 TEST_F(ReadCacheable, keep_cache)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd0, fd1;
 	ssize_t bufsize = strlen(CONTENTS);
 	char buf[bufsize];
 
 	FuseTest::expect_lookup(RELPATH, ino, S_IFREG | 0644, bufsize, 2);
 	expect_open(ino, FOPEN_KEEP_CACHE, 2);
 	expect_read(ino, 0, bufsize, bufsize, CONTENTS);
 
 	fd0 = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd0) << strerror(errno);
 	ASSERT_EQ(bufsize, read(fd0, buf, bufsize)) << strerror(errno);
 
 	fd1 = open(FULLPATH, O_RDWR);
 	ASSERT_LE(0, fd1) << strerror(errno);
 
 	/*
 	 * This read should be serviced by cache, even though it's on the other
 	 * file descriptor
 	 */
 	ASSERT_EQ(bufsize, read(fd1, buf, bufsize)) << strerror(errno);
 
 	/* Deliberately leak fd0 and fd1. */
 }
 
 /* 
  * Without the keep_cache option, the kernel should drop its read caches on
  * every open
  */
 TEST_F(Read, keep_cache_disabled)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd0, fd1;
 	ssize_t bufsize = strlen(CONTENTS);
 	char buf[bufsize];
 
 	FuseTest::expect_lookup(RELPATH, ino, S_IFREG | 0644, bufsize, 2);
 	expect_open(ino, 0, 2);
 	expect_read(ino, 0, bufsize, bufsize, CONTENTS);
 
 	fd0 = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd0) << strerror(errno);
 	ASSERT_EQ(bufsize, read(fd0, buf, bufsize)) << strerror(errno);
 
 	fd1 = open(FULLPATH, O_RDWR);
 	ASSERT_LE(0, fd1) << strerror(errno);
 
 	/*
 	 * This read should not be serviced by cache, even though it's on the
 	 * original file descriptor
 	 */
 	expect_read(ino, 0, bufsize, bufsize, CONTENTS);
 	ASSERT_EQ(0, lseek(fd0, 0, SEEK_SET)) << strerror(errno);
 	ASSERT_EQ(bufsize, read(fd0, buf, bufsize)) << strerror(errno);
 
 	/* Deliberately leak fd0 and fd1. */
 }
 
 TEST_F(ReadCacheable, mmap)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t len;
 	size_t bufsize = strlen(CONTENTS);
 	void *p;
 
 	len = getpagesize();
 
 	expect_lookup(RELPATH, ino, bufsize);
 	expect_open(ino, 0, 1);
 	/* mmap may legitimately try to read more data than is available */
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_READ &&
 				in.header.nodeid == ino &&
 				in.body.read.fh == Read::FH &&
 				in.body.read.offset == 0 &&
 				in.body.read.size >= bufsize);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
 		out.header.len = sizeof(struct fuse_out_header) + bufsize;
 		memmove(out.body.bytes, CONTENTS, bufsize);
 	})));
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	p = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
 	ASSERT_NE(MAP_FAILED, p) << strerror(errno);
 
 	ASSERT_EQ(0, memcmp(p, CONTENTS, bufsize));
 
 	ASSERT_EQ(0, munmap(p, len)) << strerror(errno);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /*
  * Just as when FOPEN_DIRECT_IO is used, reads with O_DIRECT should bypass
  * cache and to straight to the daemon
  */
 TEST_F(Read, o_direct)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	char buf[bufsize];
 
 	expect_lookup(RELPATH, ino, bufsize);
 	expect_open(ino, 0, 1);
 	expect_read(ino, 0, bufsize, bufsize, CONTENTS);
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	// Fill the cache
 	ASSERT_EQ(bufsize, read(fd, buf, bufsize)) << strerror(errno);
 	ASSERT_EQ(0, memcmp(buf, CONTENTS, bufsize));
 
 	// Reads with o_direct should bypass the cache
 	expect_read(ino, 0, bufsize, bufsize, CONTENTS);
 	ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno);
 	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
 	ASSERT_EQ(bufsize, read(fd, buf, bufsize)) << strerror(errno);
 	ASSERT_EQ(0, memcmp(buf, CONTENTS, bufsize));
 	
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 TEST_F(Read, pread)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	/* 
 	 * Set offset to a maxbcachebuf boundary so we'll be sure what offset
 	 * to read from.  Without this, the read might start at a lower offset.
 	 */
 	uint64_t offset = m_maxbcachebuf;
 	ssize_t bufsize = strlen(CONTENTS);
 	char buf[bufsize];
 
 	expect_lookup(RELPATH, ino, offset + bufsize);
 	expect_open(ino, 0, 1);
 	expect_read(ino, offset, bufsize, bufsize, CONTENTS);
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, pread(fd, buf, bufsize, offset)) << strerror(errno);
 	ASSERT_EQ(0, memcmp(buf, CONTENTS, bufsize));
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 TEST_F(Read, read)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	char buf[bufsize];
 
 	expect_lookup(RELPATH, ino, bufsize);
 	expect_open(ino, 0, 1);
 	expect_read(ino, 0, bufsize, bufsize, CONTENTS);
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, read(fd, buf, bufsize)) << strerror(errno);
 	ASSERT_EQ(0, memcmp(buf, CONTENTS, bufsize));
 
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 TEST_F(Read_7_8, read)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	char buf[bufsize];
 
 	expect_lookup(RELPATH, ino, bufsize);
 	expect_open(ino, 0, 1);
 	expect_read(ino, 0, bufsize, bufsize, CONTENTS);
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, read(fd, buf, bufsize)) << strerror(errno);
 	ASSERT_EQ(0, memcmp(buf, CONTENTS, bufsize));
 
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* 
  * If cacheing is enabled, the kernel should try to read an entire cache block
  * at a time.
  */
 TEST_F(ReadCacheable, cache_block)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS0 = "abcdefghijklmnop";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = 8;
 	ssize_t filesize = m_maxbcachebuf * 2;
 	char *contents;
 	char buf[bufsize];
 	const char *contents1 = CONTENTS0 + bufsize;
 
 	contents = (char*)calloc(1, filesize);
 	ASSERT_NE(NULL, contents);
 	memmove(contents, CONTENTS0, strlen(CONTENTS0));
 
 	expect_lookup(RELPATH, ino, filesize);
 	expect_open(ino, 0, 1);
 	expect_read(ino, 0, m_maxbcachebuf, m_maxbcachebuf,
 		contents);
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(bufsize, read(fd, buf, bufsize)) << strerror(errno);
 	ASSERT_EQ(0, memcmp(buf, CONTENTS0, bufsize));
 
 	/* A subsequent read should be serviced by cache */
 	ASSERT_EQ(bufsize, read(fd, buf, bufsize)) << strerror(errno);
 	ASSERT_EQ(0, memcmp(buf, contents1, bufsize));
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* Reading with sendfile should work (though it obviously won't be 0-copy) */
 TEST_F(ReadCacheable, sendfile)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	size_t bufsize = strlen(CONTENTS);
 	char buf[bufsize];
 	int sp[2];
 	off_t sbytes;
 
 	expect_lookup(RELPATH, ino, bufsize);
 	expect_open(ino, 0, 1);
 	/* Like mmap, sendfile may request more data than is available */
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_READ &&
 				in.header.nodeid == ino &&
 				in.body.read.fh == Read::FH &&
 				in.body.read.offset == 0 &&
 				in.body.read.size >= bufsize);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
 		out.header.len = sizeof(struct fuse_out_header) + bufsize;
 		memmove(out.body.bytes, CONTENTS, bufsize);
 	})));
 
 	ASSERT_EQ(0, socketpair(PF_LOCAL, SOCK_STREAM, 0, sp))
 		<< strerror(errno);
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	ASSERT_EQ(0, sendfile(fd, sp[1], 0, bufsize, NULL, &sbytes, 0))
 		<< strerror(errno);
 	ASSERT_EQ(static_cast<ssize_t>(bufsize), read(sp[0], buf, bufsize))
 		<< strerror(errno);
 	ASSERT_EQ(0, memcmp(buf, CONTENTS, bufsize));
 
 	close(sp[1]);
 	close(sp[0]);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
 /* sendfile should fail gracefully if fuse declines the read */
 /* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236466 */
 TEST_F(ReadCacheable, DISABLED_sendfile_eio)
 {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	const char *CONTENTS = "abcdefgh";
 	uint64_t ino = 42;
 	int fd;
 	ssize_t bufsize = strlen(CONTENTS);
 	int sp[2];
 	off_t sbytes;
 
 	expect_lookup(RELPATH, ino, bufsize);
 	expect_open(ino, 0, 1);
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_READ);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(ReturnErrno(EIO)));
 
 	ASSERT_EQ(0, socketpair(PF_LOCAL, SOCK_STREAM, 0, sp))
 		<< strerror(errno);
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	ASSERT_NE(0, sendfile(fd, sp[1], 0, bufsize, NULL, &sbytes, 0));
 
 	close(sp[1]);
 	close(sp[0]);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
-/* Large reads should be clustered, even across cache block boundaries */
-/* 
- * Disabled because clustered reads requires VOP_BMAP, which fusefs does not
- * yet support
+/*
+ * Sequential reads should use readahead.  And if allowed, large reads should
+ * be clustered.
  */
-TEST_P(ReadAhead, DISABLED_cluster) {
+TEST_P(ReadAhead, readahead) {
 	const char FULLPATH[] = "mountpoint/some_file.txt";
 	const char RELPATH[] = "some_file.txt";
 	uint64_t ino = 42;
-	int fd, maxcontig;
+	int fd, maxcontig, clustersize;
 	ssize_t bufsize = 4 * m_maxbcachebuf;
 	ssize_t filesize = bufsize;
 	uint64_t len;
 	char *rbuf, *contents;
 	off_t offs;
 
 	contents = (char*)malloc(filesize);
 	ASSERT_NE(NULL, contents);
 	memset(contents, 'X', filesize);
 	rbuf = (char*)calloc(1, bufsize);
 
 	expect_lookup(RELPATH, ino, filesize);
 	expect_open(ino, 0, 1);
 	maxcontig = m_noclusterr ? m_maxbcachebuf :
 				   m_maxbcachebuf + (int)get<1>(GetParam());
-	for (offs = 0; offs < bufsize; offs += maxcontig) {
-		len = std::min((size_t)maxcontig, (size_t)(filesize - offs));
+	clustersize = MIN(maxcontig, MAXPHYS);
+	for (offs = 0; offs < bufsize; offs += clustersize) {
+		len = std::min((size_t)clustersize, (size_t)(filesize - offs));
 		expect_read(ino, offs, len, len, contents + offs);
 	}
 
 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
 
 	/* Set the internal readahead counter to a "large" value */
 	ASSERT_EQ(0, fcntl(fd, F_READAHEAD, 1'000'000'000)) << strerror(errno);
 
 	ASSERT_EQ(bufsize, read(fd, rbuf, bufsize)) << strerror(errno);
 	ASSERT_EQ(0, memcmp(rbuf, contents, bufsize));
 
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }
 
-/* fuse(4) should honor the filesystem's requested m_readahead parameter */
-TEST_P(ReadAhead, readahead) {
-	const char FULLPATH[] = "mountpoint/some_file.txt";
-	const char RELPATH[] = "some_file.txt";
-	uint64_t ino = 42;
-	int fd, i;
-	ssize_t bufsize = m_maxbcachebuf;
-	ssize_t filesize = m_maxbcachebuf * 6;
-	char *rbuf, *contents;
-
-	contents = (char*)malloc(filesize);
-	ASSERT_NE(NULL, contents);
-	memset(contents, 'X', filesize);
-	rbuf = (char*)calloc(1, bufsize);
-
-	expect_lookup(RELPATH, ino, filesize);
-	expect_open(ino, 0, 1);
-	/* fuse(4) should only read ahead the allowed amount */
-	expect_read(ino, 0, m_maxbcachebuf, m_maxbcachebuf, contents);
-	for (i = 0; i < (int)get<1>(GetParam()) / m_maxbcachebuf; i++) {
-		off_t offs = (i + 1) * m_maxbcachebuf;
-		expect_read(ino, offs, m_maxbcachebuf, m_maxbcachebuf,
-			contents + offs);
-	}
-
-	fd = open(FULLPATH, O_RDONLY);
-	ASSERT_LE(0, fd) << strerror(errno);
-
-	/* Set the internal readahead counter to a "large" value */
-	ASSERT_EQ(0, fcntl(fd, F_READAHEAD, 1'000'000'000)) << strerror(errno);
-
-	ASSERT_EQ(bufsize, read(fd, rbuf, bufsize)) << strerror(errno);
-	ASSERT_EQ(0, memcmp(rbuf, contents, bufsize));
-
-	/* Deliberately leak fd.  close(2) will be tested in release.cc */
-}
-
 INSTANTIATE_TEST_CASE_P(RA, ReadAhead,
 	Values(tuple<bool, int>(false, 0u),
 	       tuple<bool, int>(false, 0x10000),
 	       tuple<bool, int>(false, 0x20000),
 	       tuple<bool, int>(false, 0x30000),
 	       tuple<bool, int>(true, 0u),
-	       tuple<bool, int>(true, 0x10000)));
+	       tuple<bool, int>(true, 0x10000),
+	       tuple<bool, int>(true, 0x20000)));
Index: projects/fuse2/tests/sys/fs/fusefs/utils.cc
===================================================================
--- projects/fuse2/tests/sys/fs/fusefs/utils.cc	(revision 349237)
+++ projects/fuse2/tests/sys/fs/fusefs/utils.cc	(revision 349238)
@@ -1,574 +1,588 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2019 The FreeBSD Foundation
  *
  * This software was developed by BFF Storage Systems, LLC under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 extern "C" {
 #include <sys/param.h>
 #include <sys/mman.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <sys/wait.h>
 
 #include <dirent.h>
 #include <fcntl.h>
 #include <grp.h>
 #include <pwd.h>
 #include <semaphore.h>
 #include <unistd.h>
 }
 
 #include <gtest/gtest.h>
 
 #include "mockfs.hh"
 #include "utils.hh"
 
 using namespace testing;
 
 /*
  * The default max_write is set to this formula in libfuse, though
  * individual filesystems can lower it.  The "- 4096" was added in
  * commit 154ffe2, with the commit message "fix".
  */
 const uint32_t libfuse_max_write = 32 * getpagesize() + 0x1000 - 4096;
 
 /* 
  * Set the default max_write to a distinct value from MAXPHYS to catch bugs
  * that confuse the two.
  */
 const uint32_t default_max_write = MIN(libfuse_max_write, MAXPHYS / 2);
 
 
 /* Check that fusefs(4) is accessible and the current user can mount(2) */
 void check_environment()
 {
 	const char *devnode = "/dev/fuse";
 	const char *usermount_node = "vfs.usermount";
 	int usermount_val = 0;
 	size_t usermount_size = sizeof(usermount_val);
 	if (eaccess(devnode, R_OK | W_OK)) {
 		if (errno == ENOENT) {
 			GTEST_SKIP() << devnode << " does not exist";
 		} else if (errno == EACCES) {
 			GTEST_SKIP() << devnode <<
 			    " is not accessible by the current user";
 		} else {
 			GTEST_SKIP() << strerror(errno);
 		}
 	}
 	sysctlbyname(usermount_node, &usermount_val, &usermount_size,
 		     NULL, 0);
 	if (geteuid() != 0 && !usermount_val)
 		GTEST_SKIP() << "current user is not allowed to mount";
 }
 
 class FuseEnv: public Environment {
 	virtual void SetUp() {
 	}
 };
 
 void FuseTest::SetUp() {
 	const char *node = "vfs.maxbcachebuf";
 	int val = 0;
 	size_t size = sizeof(val);
 
 	/*
 	 * XXX check_environment should be called from FuseEnv::SetUp, but
 	 * can't due to https://github.com/google/googletest/issues/2189
 	 */
 	check_environment();
 	if (IsSkipped())
 		return;
 
 	ASSERT_EQ(0, sysctlbyname(node, &val, &size, NULL, 0))
 		<< strerror(errno);
 	m_maxbcachebuf = val;
 
 	try {
 		m_mock = new MockFS(m_maxreadahead, m_allow_other,
 			m_default_permissions, m_push_symlinks_in, m_ro,
 			m_pm, m_init_flags, m_kernel_minor_version,
 			m_maxwrite, m_async, m_noclusterr);
 		/* 
 		 * FUSE_ACCESS is called almost universally.  Expecting it in
 		 * each test case would be super-annoying.  Instead, set a
 		 * default expectation for FUSE_ACCESS and return ENOSYS.
 		 *
 		 * Individual test cases can override this expectation since
 		 * googlemock evaluates expectations in LIFO order.
 		 */
 		EXPECT_CALL(*m_mock, process(
 			ResultOf([=](auto in) {
 				return (in.header.opcode == FUSE_ACCESS);
 			}, Eq(true)),
 			_)
 		).Times(AnyNumber())
 		.WillRepeatedly(Invoke(ReturnErrno(ENOSYS)));
+		/*
+		 * FUSE_BMAP is called for most test cases that read data.  Set
+		 * a default expectation and return ENOSYS.
+		 *
+		 * Individual test cases can override this expectation since
+		 * googlemock evaluates expectations in LIFO order.
+		 */
+		EXPECT_CALL(*m_mock, process(
+			ResultOf([=](auto in) {
+				return (in.header.opcode == FUSE_BMAP);
+			}, Eq(true)),
+			_)
+		).Times(AnyNumber())
+		.WillRepeatedly(Invoke(ReturnErrno(ENOSYS)));
 	} catch (std::system_error err) {
 		FAIL() << err.what();
 	}
 }
 
 void
 FuseTest::expect_access(uint64_t ino, mode_t access_mode, int error)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_ACCESS &&
 				in.header.nodeid == ino &&
 				in.body.access.mask == access_mode);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(ReturnErrno(error)));
 }
 
 void
 FuseTest::expect_destroy(int error)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_DESTROY);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke( ReturnImmediate([&](auto in, auto& out) {
 		m_mock->m_quit = true;
 		out.header.len = sizeof(out.header);
 		out.header.unique = in.header.unique;
 		out.header.error = -error;
 	})));
 }
 
 void
 FuseTest::expect_flush(uint64_t ino, int times, ProcessMockerT r)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_FLUSH &&
 				in.header.nodeid == ino);
 		}, Eq(true)),
 		_)
 	).Times(times)
 	.WillRepeatedly(Invoke(r));
 }
 
 void
 FuseTest::expect_forget(uint64_t ino, uint64_t nlookup, sem_t *sem)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_FORGET &&
 				in.header.nodeid == ino &&
 				in.body.forget.nlookup == nlookup);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke([=](auto in __unused, auto &out __unused) {
 		if (sem != NULL)
 			sem_post(sem);
 		/* FUSE_FORGET has no response! */
 	}));
 }
 
 void FuseTest::expect_getattr(uint64_t ino, uint64_t size)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_GETATTR &&
 				in.header.nodeid == ino);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(ReturnImmediate([=](auto i __unused, auto& out) {
 		SET_OUT_HEADER_LEN(out, attr);
 		out.body.attr.attr.ino = ino;	// Must match nodeid
 		out.body.attr.attr.mode = S_IFREG | 0644;
 		out.body.attr.attr.size = size;
 		out.body.attr.attr_valid = UINT64_MAX;
 	})));
 }
 
 void FuseTest::expect_lookup(const char *relpath, uint64_t ino, mode_t mode,
 	uint64_t size, int times, uint64_t attr_valid, uid_t uid, gid_t gid)
 {
 	EXPECT_LOOKUP(FUSE_ROOT_ID, relpath)
 	.Times(times)
 	.WillRepeatedly(Invoke(
 		ReturnImmediate([=](auto in __unused, auto& out) {
 		SET_OUT_HEADER_LEN(out, entry);
 		out.body.entry.attr.mode = mode;
 		out.body.entry.nodeid = ino;
 		out.body.entry.attr.nlink = 1;
 		out.body.entry.attr_valid = attr_valid;
 		out.body.entry.attr.size = size;
 		out.body.entry.attr.uid = uid;
 		out.body.entry.attr.gid = gid;
 	})));
 }
 
 void FuseTest::expect_lookup_7_8(const char *relpath, uint64_t ino, mode_t mode,
 	uint64_t size, int times, uint64_t attr_valid, uid_t uid, gid_t gid)
 {
 	EXPECT_LOOKUP(FUSE_ROOT_ID, relpath)
 	.Times(times)
 	.WillRepeatedly(Invoke(
 		ReturnImmediate([=](auto in __unused, auto& out) {
 		SET_OUT_HEADER_LEN(out, entry_7_8);
 		out.body.entry.attr.mode = mode;
 		out.body.entry.nodeid = ino;
 		out.body.entry.attr.nlink = 1;
 		out.body.entry.attr_valid = attr_valid;
 		out.body.entry.attr.size = size;
 		out.body.entry.attr.uid = uid;
 		out.body.entry.attr.gid = gid;
 	})));
 }
 
 void FuseTest::expect_open(uint64_t ino, uint32_t flags, int times)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_OPEN &&
 				in.header.nodeid == ino);
 		}, Eq(true)),
 		_)
 	).Times(times)
 	.WillRepeatedly(Invoke(
 		ReturnImmediate([=](auto in __unused, auto& out) {
 		out.header.len = sizeof(out.header);
 		SET_OUT_HEADER_LEN(out, open);
 		out.body.open.fh = FH;
 		out.body.open.open_flags = flags;
 	})));
 }
 
 void FuseTest::expect_opendir(uint64_t ino)
 {
 	/* opendir(3) calls fstatfs */
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([](auto in) {
 			return (in.header.opcode == FUSE_STATFS);
 		}, Eq(true)),
 		_)
 	).WillRepeatedly(Invoke(
 	ReturnImmediate([=](auto i __unused, auto& out) {
 		SET_OUT_HEADER_LEN(out, statfs);
 	})));
 
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_OPENDIR &&
 				in.header.nodeid == ino);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
 		out.header.len = sizeof(out.header);
 		SET_OUT_HEADER_LEN(out, open);
 		out.body.open.fh = FH;
 	})));
 }
 
 void FuseTest::expect_read(uint64_t ino, uint64_t offset, uint64_t isize,
 	uint64_t osize, const void *contents, int flags)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_READ &&
 				in.header.nodeid == ino &&
 				in.body.read.fh == FH &&
 				in.body.read.offset == offset &&
 				in.body.read.size == isize &&
 				flags == -1 ?
 					(in.body.read.flags == O_RDONLY ||
 					 in.body.read.flags == O_RDWR)
 				: in.body.read.flags == (uint32_t)flags);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
 		out.header.len = sizeof(struct fuse_out_header) + osize;
 		memmove(out.body.bytes, contents, osize);
 	}))).RetiresOnSaturation();
 }
 
 void FuseTest::expect_readdir(uint64_t ino, uint64_t off,
 	std::vector<struct dirent> &ents)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_READDIR &&
 				in.header.nodeid == ino &&
 				in.body.readdir.fh == FH &&
 				in.body.readdir.offset == off);
 		}, Eq(true)),
 		_)
 	).WillRepeatedly(Invoke(ReturnImmediate([=](auto in, auto& out) {
 		struct fuse_dirent *fde = (struct fuse_dirent*)&(out.body);
 		int i = 0;
 
 		out.header.error = 0;
 		out.header.len = 0;
 
 		for (const auto& it: ents) {
 			size_t entlen, entsize;
 
 			fde->ino = it.d_fileno;
 			fde->off = it.d_off;
 			fde->type = it.d_type;
 			fde->namelen = it.d_namlen;
 			strncpy(fde->name, it.d_name, it.d_namlen);
 			entlen = FUSE_NAME_OFFSET + fde->namelen;
 			entsize = FUSE_DIRENT_SIZE(fde);
 			/* 
 			 * The FUSE protocol does not require zeroing out the
 			 * unused portion of the name.  But it's a good
 			 * practice to prevent information disclosure to the
 			 * FUSE client, even though the client is usually the
 			 * kernel
 			 */
 			memset(fde->name + fde->namelen, 0, entsize - entlen);
 			if (out.header.len + entsize > in.body.read.size) {
 				printf("Overflow in readdir expectation: i=%d\n"
 					, i);
 				break;
 			}
 			out.header.len += entsize;
 			fde = (struct fuse_dirent*)
 				((intmax_t*)fde + entsize / sizeof(intmax_t));
 			i++;
 		}
 		out.header.len += sizeof(out.header);
 	})));
 
 }
 void FuseTest::expect_release(uint64_t ino, uint64_t fh)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_RELEASE &&
 				in.header.nodeid == ino &&
 				in.body.release.fh == fh);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(ReturnErrno(0)));
 }
 
 void FuseTest::expect_releasedir(uint64_t ino, ProcessMockerT r)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_RELEASEDIR &&
 				in.header.nodeid == ino &&
 				in.body.release.fh == FH);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(r));
 }
 
 void FuseTest::expect_unlink(uint64_t parent, const char *path, int error)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			return (in.header.opcode == FUSE_UNLINK &&
 				0 == strcmp(path, in.body.unlink) &&
 				in.header.nodeid == parent);
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(ReturnErrno(error)));
 }
 
 void FuseTest::expect_write(uint64_t ino, uint64_t offset, uint64_t isize,
 	uint64_t osize, uint32_t flags_set, uint32_t flags_unset,
 	const void *contents)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			const char *buf = (const char*)in.body.bytes +
 				sizeof(struct fuse_write_in);
 			bool pid_ok;
 			uint32_t wf = in.body.write.write_flags;
 
 			if (wf & FUSE_WRITE_CACHE)
 				pid_ok = true;
 			else
 				pid_ok = (pid_t)in.header.pid == getpid();
 
 			return (in.header.opcode == FUSE_WRITE &&
 				in.header.nodeid == ino &&
 				in.body.write.fh == FH &&
 				in.body.write.offset == offset  &&
 				in.body.write.size == isize &&
 				pid_ok &&
 				(wf & flags_set) == flags_set &&
 				(wf & flags_unset) == 0 &&
 				(in.body.write.flags == O_WRONLY ||
 				 in.body.write.flags == O_RDWR) &&
 				0 == bcmp(buf, contents, isize));
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
 		SET_OUT_HEADER_LEN(out, write);
 		out.body.write.size = osize;
 	})));
 }
 
 void FuseTest::expect_write_7_8(uint64_t ino, uint64_t offset, uint64_t isize,
 	uint64_t osize, const void *contents)
 {
 	EXPECT_CALL(*m_mock, process(
 		ResultOf([=](auto in) {
 			const char *buf = (const char*)in.body.bytes +
 				FUSE_COMPAT_WRITE_IN_SIZE;
 			bool pid_ok = (pid_t)in.header.pid == getpid();
 			return (in.header.opcode == FUSE_WRITE &&
 				in.header.nodeid == ino &&
 				in.body.write.fh == FH &&
 				in.body.write.offset == offset  &&
 				in.body.write.size == isize &&
 				pid_ok &&
 				0 == bcmp(buf, contents, isize));
 		}, Eq(true)),
 		_)
 	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
 		SET_OUT_HEADER_LEN(out, write);
 		out.body.write.size = osize;
 	})));
 }
 
 void
 get_unprivileged_id(uid_t *uid, gid_t *gid)
 {
 	struct passwd *pw;
 	struct group *gr;
 
 	/* 
 	 * First try "tests", Kyua's default unprivileged user.  XXX after
 	 * GoogleTest gains a proper Kyua wrapper, get this with the Kyua API
 	 */
 	pw = getpwnam("tests");
 	if (pw == NULL) {
 		/* Fall back to "nobody" */
 		pw = getpwnam("nobody");
 	}
 	if (pw == NULL)
 		GTEST_SKIP() << "Test requires an unprivileged user";
 	/* Use group "nobody", which is Kyua's default unprivileged group */
 	gr = getgrnam("nobody");
 	if (gr == NULL)
 		GTEST_SKIP() << "Test requires an unprivileged group";
 	*uid = pw->pw_uid;
 	*gid = gr->gr_gid;
 }
 
 void
 FuseTest::fork(bool drop_privs, int *child_status,
 	std::function<void()> parent_func,
 	std::function<int()> child_func)
 {
 	sem_t *sem;
 	int mprot = PROT_READ | PROT_WRITE;
 	int mflags = MAP_ANON | MAP_SHARED;
 	pid_t child;
 	uid_t uid;
 	gid_t gid;
 	
 	if (drop_privs) {
 		get_unprivileged_id(&uid, &gid);
 		if (IsSkipped())
 			return;
 	}
 
 	sem = (sem_t*)mmap(NULL, sizeof(*sem), mprot, mflags, -1, 0);
 	ASSERT_NE(MAP_FAILED, sem) << strerror(errno);
 	ASSERT_EQ(0, sem_init(sem, 1, 0)) << strerror(errno);
 
 	if ((child = ::fork()) == 0) {
 		/* In child */
 		int err = 0;
 
 		if (sem_wait(sem)) {
 			perror("sem_wait");
 			err = 1;
 			goto out;
 		}
 
 		if (drop_privs && 0 != setegid(gid)) {
 			perror("setegid");
 			err = 1;
 			goto out;
 		}
 		if (drop_privs && 0 != setreuid(-1, uid)) {
 			perror("setreuid");
 			err = 1;
 			goto out;
 		}
 		err = child_func();
 
 out:
 		sem_destroy(sem);
 		_exit(err);
 	} else if (child > 0) {
 		/* 
 		 * In parent.  Cleanup must happen here, because it's still
 		 * privileged.
 		 */
 		m_mock->m_child_pid = child;
 		ASSERT_NO_FATAL_FAILURE(parent_func());
 
 		/* Signal the child process to go */
 		ASSERT_EQ(0, sem_post(sem)) << strerror(errno);
 
 		ASSERT_LE(0, wait(child_status)) << strerror(errno);
 	} else {
 		FAIL() << strerror(errno);
 	}
 	munmap(sem, sizeof(*sem));
 	return;
 }
 
 static void usage(char* progname) {
 	fprintf(stderr, "Usage: %s [-v]\n\t-v increase verbosity\n", progname);
 	exit(2);
 }
 
 int main(int argc, char **argv) {
 	int ch;
 	FuseEnv *fuse_env = new FuseEnv;
 
 	InitGoogleTest(&argc, argv);
 	AddGlobalTestEnvironment(fuse_env);
 
 	while ((ch = getopt(argc, argv, "v")) != -1) {
 		switch (ch) {
 			case 'v':
 				verbosity++;
 				break;
 			default:
 				usage(argv[0]);
 				break;
 		}
 	}
 
 	return (RUN_ALL_TESTS());
 }