Index: UPDATING
===================================================================
--- UPDATING
+++ UPDATING
@@ -31,6 +31,18 @@
 	disable the most expensive debugging functionality run
 	"ln -s 'abort:false,junk:false' /etc/malloc.conf".)
 
+20190627:
+	The vfs.fusefs.sync_unmount and vfs.fusefs.init_backgrounded sysctls
+	and the "-o sync_unmount" and "-o init_backgrounded" mount options have
+	been removed from mount_fusefs(8).  You can safely remove them from
+	your scripts, because they had no effect.
+
+	The vfs.fusefs.fix_broken_io, vfs.fusefs.sync_resize,
+	vfs.fusefs.refresh_size, vfs.fusefs.mmap_enable,
+	vfs.fusefs.reclaim_revoked, and vfs.fusefs.data_cache_invalidate
+	sysctls have been removed.  If you felt the need to set any of them to
+	a non-default value, please tell asomers@FreeBSD.org why.
+
 20190620:
 	Entropy collection and the /dev/random device are no longer optional
 	components.  The "device random" option has been removed.
Index: etc/mtree/BSD.tests.dist
===================================================================
--- etc/mtree/BSD.tests.dist
+++ etc/mtree/BSD.tests.dist
@@ -731,6 +731,8 @@
         file
         ..
         fs
+            fusefs
+            ..
             tmpfs
             ..
         ..
Index: lib/libc/gen/getvfsbyname.c
===================================================================
--- lib/libc/gen/getvfsbyname.c
+++ lib/libc/gen/getvfsbyname.c
@@ -37,10 +37,27 @@
 #include <sys/mount.h>
 #include <sys/sysctl.h>
 #include <errno.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 
 /*
+ * fusefs(5) file systems may have a "subtype" which gets appended to
+ * statfs(2)'s f_fstypename field on a per-mount basis.  Allow getvfsbyname to
+ * match either the full "fusefs.foobar" or the more general "fusefs".
+ */
+static bool
+are_fusefs(const char *fsname, const char *vfc_name)
+{
+	const static char fusefs[] = "fusefs";
+	const static char fusefs_dot[] = "fusefs.";
+
+
+	return (strncmp(fsname, fusefs_dot, sizeof(fusefs_dot) - 1) == 0 &&
+	    strcmp(fusefs, vfc_name) == 0);
+}
+
+/*
  * Given a filesystem name, determine if it is resident in the kernel,
  * and if it is resident, return its xvfsconf structure.
  */
@@ -62,7 +79,9 @@
 	}
 	cnt = buflen / sizeof(struct xvfsconf);
 	for (i = 0; i < cnt; i++) {
-		if (strcmp(fsname, xvfsp[i].vfc_name) == 0) {
+		if (strcmp(fsname, xvfsp[i].vfc_name) == 0 ||
+		    are_fusefs(fsname, xvfsp[i].vfc_name))
+		{
 			memcpy(vfcp, xvfsp + i, sizeof(struct xvfsconf));
 			free(xvfsp);
 			return (0);
Index: sbin/mount_fusefs/mount_fusefs.8
===================================================================
--- sbin/mount_fusefs/mount_fusefs.8
+++ sbin/mount_fusefs/mount_fusefs.8
@@ -3,6 +3,11 @@
 .\" Copyright (c) 2005, 2006 Csaba Henk
 .\" All rights reserved.
 .\"
+.\" Copyright (c) 2019 The FreeBSD Foundation
+.\"
+.\" Portions of this documentation were written by BFF Storage Systems under
+.\" sponsorship from the FreeBSD Foundation.
+.\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
@@ -29,7 +34,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 17, 2018
+.Dd June 14, 2019
 .Dt MOUNT_FUSEFS 8
 .Os
 .Sh NAME
@@ -136,23 +141,33 @@
 by prefixing them with
 .Dq no ) :
 .Bl -tag -width indent
-.It Cm default_permissions
-Enable traditional (file mode based) permission checking in kernel
 .It Cm allow_other
 Do not apply
 .Sx STRICT ACCESS POLICY .
 Only root can use this option
+.It Cm async
+I/O to the file system may be done asynchronously.
+Writes may delayed and/or reordered.
+.It Cm default_permissions
+Enable traditional (file mode based) permission checking in kernel
 .It Cm max_read Ns = Ns Ar n
 Limit size of read requests to
 .Ar n
+.It Cm neglect_shares
+Do not refuse unmounting if there are secondary mounts
 .It Cm private
 Refuse shared mounting of the daemon.
 This is the default behaviour, to allow sharing, expicitly use
 .Fl o Cm noprivate
-.It Cm neglect_shares
-Do not refuse unmounting if there are secondary mounts
 .It Cm push_symlinks_in
 Prefix absolute symlinks with the mountpoint
+.It Cm subtype Ns = Ns Ar fsname
+Suffix
+.Ar fsname
+to the file system name as reported by
+.Xr statfs 2 .
+This option can be used to identify the file system implemented by
+.Ar fuse_daemon .
 .El
 .El
 .Pp
Index: sbin/mount_fusefs/mount_fusefs.c
===================================================================
--- sbin/mount_fusefs/mount_fusefs.c
+++ sbin/mount_fusefs/mount_fusefs.c
@@ -5,6 +5,11 @@
  * Copyright (c) 2005 Csaba Henk 
  * All rights reserved.
  *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems under
+ * sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -60,7 +65,6 @@
 void	usage(void);
 void	helpmsg(void);
 void	showversion(void);
-int	init_backgrounded(void);
 
 static struct mntopt mopts[] = {
 	#define ALTF_PRIVATE 0x01
@@ -73,8 +77,6 @@
 	{ "max_read=",           0, ALTF_MAXREAD, 1 },
 	#define ALTF_SUBTYPE 0x40
 	{ "subtype=",            0, ALTF_SUBTYPE, 1 },
-	#define ALTF_SYNC_UNMOUNT 0x80
-	{ "sync_unmount",        0, ALTF_SYNC_UNMOUNT, 1 },
 	/*
 	 * MOPT_AUTOMOUNTED, included by MOPT_STDOPTS, does not fit into
 	 * the 'flags' argument to nmount(2).  We have to abuse altflags
@@ -91,6 +93,8 @@
 	{ "large_read",          0, 0x00, 1 },
 	/* "nonempty", just the first two chars are stripped off during parsing */
 	{ "nempty",              0, 0x00, 1 },
+	{ "async",               0, MNT_ASYNC, 0},
+	{ "noasync",             1, MNT_ASYNC, 0},
 	MOPT_STDOPTS,
 	MOPT_END
 };
@@ -107,7 +111,7 @@
 	{ 0, NULL, 0 }
 };
 
-#define DEFAULT_MOUNT_FLAGS ALTF_PRIVATE | ALTF_SYNC_UNMOUNT
+#define DEFAULT_MOUNT_FLAGS ALTF_PRIVATE
 
 int
 main(int argc, char *argv[])
@@ -409,12 +413,6 @@
 		}
 	}
 
-	if (fd >= 0 && ! init_backgrounded() && close(fd) < 0) {
-		if (pid)
-			kill(pid, SIGKILL);
-		err(1, "failed to close fuse device");
-	}
-
 	/* Prepare the options vector for nmount(). build_iovec() is declared
 	 * in mntopts.h. */
 	sprintf(fdstr, "%d", fd);
@@ -481,7 +479,6 @@
 	        "    -o neglect_shares      don't report EBUSY when unmount attempted\n"
 	        "                           in presence of secondary mounts\n" 
 	        "    -o push_symlinks_in    prefix absolute symlinks with mountpoint\n"
-	        "    -o sync_unmount        do unmount synchronously\n"
 	        );
 	exit(EX_USAGE);
 }
@@ -491,18 +488,4 @@
 {
 	puts("mount_fusefs [fuse4bsd] version: " FUSE4BSD_VERSION);
 	exit(EX_USAGE);
-}
-
-int
-init_backgrounded(void)
-{
-	int ibg;
-	size_t len;
-
-	len = sizeof(ibg);
-
-	if (sysctlbyname("vfs.fusefs.init_backgrounded", &ibg, &len, NULL, 0))
-		return (0);
-
-	return (ibg);
 }
Index: share/man/man5/fusefs.5
===================================================================
--- share/man/man5/fusefs.5
+++ share/man/man5/fusefs.5
@@ -3,8 +3,8 @@
 .\"
 .\" Copyright (c) 2019 The FreeBSD Foundation
 .\"
-.\" This software was developed by BFF Storage Systems, LLC under sponsorship
-.\" from the FreeBSD Foundation.
+.\" This documentation was written by BFF Storage Systems, LLC under
+.\" sponsorship from the FreeBSD Foundation.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
@@ -28,7 +28,7 @@
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
-.Dd April 13, 2019
+.Dd June 27, 2019
 .Dt FUSEFS 5
 .Os
 .Sh NAME
@@ -60,11 +60,9 @@
 API is portable.
 Many daemons can run on multiple operating systems with minimal modifications.
 .Sh SYSCTL VARIABLES
-The following variables are available as both
+The following 
 .Xr sysctl 8
-variables and
-.Xr loader 8
-tunables:
+variables are available:
 .Bl -tag -width indent
 .It Va vfs.fusefs.kernelabi_major
 Major version of the FUSE kernel ABI supported by this driver.
@@ -73,7 +71,7 @@
 .It Va vfs.fusefs.data_cache_mode
 Controls how
 .Nm
-will cache file data.
+will cache file data for pre-7.23 file systems.
 A value of 0 will disable caching entirely.
 Every data access will be forwarded to the daemon.
 A value of 1 will select write-through caching.
@@ -84,33 +82,25 @@
 to the daemon by the page daemon.
 Write-back caching is usually unsafe, especially for FUSE file systems that
 require network access.
-.It Va vfs.fusefs.lookup_cache_enable
-Controls whether
-.Nm
-will cache lookup responses from the file system.
-FUSE file systems indicate whether lookup responses should be cacheable, but
-it may be useful to globally disable caching them if a file system is
-misbehaving.
+.Pp
+FUSE file systems using protocol 7.23 or later specify their cache behavior
+on a per-mountpoint basis, ignoring this sysctl.
+.It Va vfs.fusefs.stats.filehandle_count
+Current number of open FUSE file handles.
+.It Va vfs.fusefs.stats.lookup_cache_hits
+Total number of lookup cache hits.
+.It Va vfs.fusefs.stats.lookup_cache_misses
+Total number of lookup cache misses.
+.It Va vfs.fusefs.stats.node_count
+Current number of allocated FUSE vnodes.
+.It Va vfs.fusefs.stats.ticket_count
+Current number of allocated FUSE tickets, which is roughly equal to the number
+number of FUSE operations currently being processed by daemons.
 .\" Undocumented sysctls
 .\" ====================
-.\" Counters: I intend to rename to vfs.fusefs.stats.* for clarity
-.\" vfs.fusefs.lookup_cache_{hits, misses}
-.\" vfs.fusefs.filehandle_count
-.\" vfs.fusefs.ticker_count
-.\" vfs.fusefs.node_count
-.\"
-.\" vfs.fusefs.version - useless since the driver moved in-tree
-.\" vfs.fusefs.reclaim_revoked: I don't understand it well-enough
-.\" vfs.fusefs.sync_unmount: dead code
 .\" vfs.fusefs.enforce_dev_perms: I don't understand it well enough.
-.\" vfs.fusefs.init_backgrounded: dead code
 .\" vfs.fusefs.iov_credit: I don't understand it well enough
 .\" vfs.fusefs.iov_permanent_bufsize: I don't understand it well enough
-.\" vfs.fusefs.fix_broken_io: I don't understand it well enough
-.\" vfs.fusefs.sync_resize: useless and should be removed
-.\" vfs.fusefs.refresh_size: probably useless?
-.\" vfs.fusefs.mmap_enable: why is this optional?
-.\" vfs.fusefs.data_cache_invalidate: what is this needed for?
 .Sh SEE ALSO
 .Xr mount_fusefs 8
 .Sh HISTORY
Index: share/man/man9/VOP_FSYNC.9
===================================================================
--- share/man/man9/VOP_FSYNC.9
+++ share/man/man9/VOP_FSYNC.9
@@ -4,6 +4,11 @@
 .\"
 .\" All rights reserved.
 .\"
+.\" Copyright (c) 2019 The FreeBSD Foundation
+.\"
+.\" Portions of this documentation were written by BFF Storage Systems under
+.\" sponsorship from the FreeBSD Foundation.
+.\"
 .\" This program is free software.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
Index: share/mk/bsd.compiler.mk
===================================================================
--- share/mk/bsd.compiler.mk
+++ share/mk/bsd.compiler.mk
@@ -19,6 +19,7 @@
 # COMPILER_FEATURES will contain one or more of the following, based on
 # compiler support for that feature:
 #
+# - c++14:     supports full (or nearly full) C++14 programming environment.
 # - c++11:     supports full (or nearly full) C++11 programming environment.
 # - retpoline: supports the retpoline speculative execution vulnerability
 #              mitigation.
@@ -200,6 +201,10 @@
 .endif
 
 ${X_}COMPILER_FEATURES=
+.if ${${X_}COMPILER_TYPE} == "clang" || \
+	(${${X_}COMPILER_TYPE} == "gcc" && ${${X_}COMPILER_VERSION} >= 50000)
+${X_}COMPILER_FEATURES+=	c++14
+.endif
 .if ${${X_}COMPILER_TYPE} == "clang" || \
 	(${${X_}COMPILER_TYPE} == "gcc" && ${${X_}COMPILER_VERSION} >= 40800)
 ${X_}COMPILER_FEATURES+=	c++11
Index: sys/fs/fuse/fuse.h
===================================================================
--- sys/fs/fuse/fuse.h
+++ sys/fs/fuse/fuse.h
@@ -32,6 +32,11 @@
  * 
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
+ *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -63,87 +68,10 @@
 #define FUSE_MIN_DAEMON_TIMEOUT                    0      /* s */
 #define FUSE_MAX_DAEMON_TIMEOUT                    600    /* s */
 
-#ifndef FUSE_FREEBSD_VERSION
-#define	FUSE_FREEBSD_VERSION	"0.4.4"
-#endif
-
-/* Mapping versions to features */
-
-#define FUSE_KERNELABI_GEQ(maj, min)	\
-(FUSE_KERNEL_VERSION > (maj) || (FUSE_KERNEL_VERSION == (maj) && FUSE_KERNEL_MINOR_VERSION >= (min)))
-
-/*
- * Appearance of new FUSE operations is not always in par with version
- * numbering... At least, 7.3 is a sufficient condition for having
- * FUSE_{ACCESS,CREATE}.
- */
-#if FUSE_KERNELABI_GEQ(7, 3)
-#ifndef FUSE_HAS_ACCESS
-#define FUSE_HAS_ACCESS 1
-#endif
-#ifndef FUSE_HAS_CREATE
-#define FUSE_HAS_CREATE 1
-#endif
-#else /* FUSE_KERNELABI_GEQ(7, 3) */
-#ifndef FUSE_HAS_ACCESS
-#define FUSE_HAS_ACCESS 0
-#endif
-#ifndef FUSE_HAS_CREATE
-#define FUSE_HAS_CREATE 0
-#endif
-#endif
-
-#if FUSE_KERNELABI_GEQ(7, 7)
-#ifndef FUSE_HAS_GETLK
-#define FUSE_HAS_GETLK 1
-#endif
-#ifndef FUSE_HAS_SETLK
-#define FUSE_HAS_SETLK 1
-#endif
-#ifndef FUSE_HAS_SETLKW
-#define FUSE_HAS_SETLKW 1
-#endif
-#ifndef FUSE_HAS_INTERRUPT
-#define FUSE_HAS_INTERRUPT 1
-#endif
-#else /* FUSE_KERNELABI_GEQ(7, 7) */
-#ifndef FUSE_HAS_GETLK
-#define FUSE_HAS_GETLK 0
-#endif
-#ifndef FUSE_HAS_SETLK
-#define FUSE_HAS_SETLK 0
-#endif
-#ifndef FUSE_HAS_SETLKW
-#define FUSE_HAS_SETLKW 0
-#endif
-#ifndef FUSE_HAS_INTERRUPT
-#define FUSE_HAS_INTERRUPT 0
-#endif
-#endif
-
-#if FUSE_KERNELABI_GEQ(7, 8)
-#ifndef FUSE_HAS_FLUSH_RELEASE
-#define FUSE_HAS_FLUSH_RELEASE 1
-/*
- * "DESTROY" came in the middle of the 7.8 era,
- * so this is not completely exact...
- */
-#ifndef FUSE_HAS_DESTROY
-#define FUSE_HAS_DESTROY 1
-#endif
-#endif
-#else /* FUSE_KERNELABI_GEQ(7, 8) */
-#ifndef FUSE_HAS_FLUSH_RELEASE
-#define FUSE_HAS_FLUSH_RELEASE 0
-#ifndef FUSE_HAS_DESTROY
-#define FUSE_HAS_DESTROY 0
-#endif
-#endif
-#endif
-
 /* misc */
 
 SYSCTL_DECL(_vfs_fusefs);
+SYSCTL_DECL(_vfs_fusefs_stats);
 
 /* Fuse locking */
 
Index: sys/fs/fuse/fuse_device.c
===================================================================
--- sys/fs/fuse/fuse_device.c
+++ sys/fs/fuse/fuse_device.c
@@ -33,6 +33,11 @@
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -81,27 +86,28 @@
 #include <sys/selinfo.h>
 
 #include "fuse.h"
+#include "fuse_internal.h"
 #include "fuse_ipc.h"
 
-SDT_PROVIDER_DECLARE(fuse);
+SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
-SDT_PROBE_DEFINE2(fuse, , device, trace, "int", "char*");
+SDT_PROBE_DEFINE2(fusefs, , device, trace, "int", "char*");
 
 static struct cdev *fuse_dev;
 
+static d_kqfilter_t fuse_device_filter;
 static d_open_t fuse_device_open;
-static d_close_t fuse_device_close;
 static d_poll_t fuse_device_poll;
 static d_read_t fuse_device_read;
 static d_write_t fuse_device_write;
 
 static struct cdevsw fuse_device_cdevsw = {
+	.d_kqfilter = fuse_device_filter,
 	.d_open = fuse_device_open,
-	.d_close = fuse_device_close,
 	.d_name = "fuse",
 	.d_poll = fuse_device_poll,
 	.d_read = fuse_device_read,
@@ -109,6 +115,15 @@
 	.d_version = D_VERSION,
 };
 
+static int fuse_device_filt_read(struct knote *kn, long hint);
+static void fuse_device_filt_detach(struct knote *kn);
+
+struct filterops fuse_device_rfiltops = {
+	.f_isfd = 1,
+	.f_detach = fuse_device_filt_detach,
+	.f_event = fuse_device_filt_read,
+};
+
 /****************************
  *
  * >>> Fuse device op defs
@@ -119,11 +134,100 @@
 fdata_dtor(void *arg)
 {
 	struct fuse_data *fdata;
+	struct fuse_ticket *tick;
 
 	fdata = arg;
+	if (fdata == NULL)
+		return;
+
+	fdata_set_dead(fdata);
+
+	FUSE_LOCK();
+	fuse_lck_mtx_lock(fdata->aw_mtx);
+	/* wakup poll()ers */
+	selwakeuppri(&fdata->ks_rsel, PZERO + 1);
+	/* Don't let syscall handlers wait in vain */
+	while ((tick = fuse_aw_pop(fdata))) {
+		fuse_lck_mtx_lock(tick->tk_aw_mtx);
+		fticket_set_answered(tick);
+		tick->tk_aw_errno = ENOTCONN;
+		wakeup(tick);
+		fuse_lck_mtx_unlock(tick->tk_aw_mtx);
+		FUSE_ASSERT_AW_DONE(tick);
+		fuse_ticket_drop(tick);
+	}
+	fuse_lck_mtx_unlock(fdata->aw_mtx);
+
+	/* Cleanup unsent operations */
+	fuse_lck_mtx_lock(fdata->ms_mtx);
+	while ((tick = fuse_ms_pop(fdata))) {
+		fuse_ticket_drop(tick);
+	}
+	fuse_lck_mtx_unlock(fdata->ms_mtx);
+	FUSE_UNLOCK();
+
 	fdata_trydestroy(fdata);
 }
 
+static int
+fuse_device_filter(struct cdev *dev, struct knote *kn)
+{
+	struct fuse_data *data;
+	int error;
+
+	error = devfs_get_cdevpriv((void **)&data);
+
+	/* EVFILT_WRITE is not supported; the device is always ready to write */
+	if (error == 0 && kn->kn_filter == EVFILT_READ) {
+		kn->kn_fop = &fuse_device_rfiltops;
+		kn->kn_hook = data;
+		knlist_add(&data->ks_rsel.si_note, kn, 0);
+		error = 0;
+	} else if (error == 0) {
+		error = EINVAL;
+		kn->kn_data = error;
+	}
+
+	return (error);
+}
+
+static void
+fuse_device_filt_detach(struct knote *kn)
+{
+	struct fuse_data *data;
+
+	data = (struct fuse_data*)kn->kn_hook;
+	MPASS(data != NULL);
+	knlist_remove(&data->ks_rsel.si_note, kn, 0);
+	kn->kn_hook = NULL;
+}
+
+static int
+fuse_device_filt_read(struct knote *kn, long hint)
+{
+	struct fuse_data *data;
+	int ready;
+
+	data = (struct fuse_data*)kn->kn_hook;
+	MPASS(data != NULL);
+
+	mtx_assert(&data->ms_mtx, MA_OWNED);
+	if (fdata_get_dead(data)) {
+		kn->kn_flags |= EV_EOF;
+		kn->kn_fflags = ENODEV;
+		kn->kn_data = 1;
+		ready = 1;
+	} else if (STAILQ_FIRST(&data->ms_head)) {
+		MPASS(data->ms_count >= 1);
+		kn->kn_data = data->ms_count;
+		ready = 1;
+	} else {
+		ready = 0;
+	}
+
+	return (ready);
+}
+
 /*
  * Resources are set up on a per-open basis
  */
@@ -133,52 +237,17 @@
 	struct fuse_data *fdata;
 	int error;
 
-	SDT_PROBE2(fuse, , device, trace, 1, "device open");
+	SDT_PROBE2(fusefs, , device, trace, 1, "device open");
 
 	fdata = fdata_alloc(dev, td->td_ucred);
 	error = devfs_set_cdevpriv(fdata, fdata_dtor);
 	if (error != 0)
 		fdata_trydestroy(fdata);
 	else
-		SDT_PROBE2(fuse, , device, trace, 1, "device open success");
+		SDT_PROBE2(fusefs, , device, trace, 1, "device open success");
 	return (error);
 }
 
-static int
-fuse_device_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
-{
-	struct fuse_data *data;
-	struct fuse_ticket *tick;
-	int error;
-
-	error = devfs_get_cdevpriv((void **)&data);
-	if (error != 0)
-		return (error);
-	if (!data)
-		panic("no fuse data upon fuse device close");
-	fdata_set_dead(data);
-
-	FUSE_LOCK();
-	fuse_lck_mtx_lock(data->aw_mtx);
-	/* wakup poll()ers */
-	selwakeuppri(&data->ks_rsel, PZERO + 1);
-	/* Don't let syscall handlers wait in vain */
-	while ((tick = fuse_aw_pop(data))) {
-		fuse_lck_mtx_lock(tick->tk_aw_mtx);
-		fticket_set_answered(tick);
-		tick->tk_aw_errno = ENOTCONN;
-		wakeup(tick);
-		fuse_lck_mtx_unlock(tick->tk_aw_mtx);
-		FUSE_ASSERT_AW_DONE(tick);
-		fuse_ticket_drop(tick);
-	}
-	fuse_lck_mtx_unlock(data->aw_mtx);
-	FUSE_UNLOCK();
-
-	SDT_PROBE2(fuse, , device, trace, 1, "device close");
-	return (0);
-}
-
 int
 fuse_device_poll(struct cdev *dev, int events, struct thread *td)
 {
@@ -219,7 +288,7 @@
 	int buflen[3];
 	int i;
 
-	SDT_PROBE2(fuse, , device, trace, 1, "fuse device read");
+	SDT_PROBE2(fusefs, , device, trace, 1, "fuse device read");
 
 	err = devfs_get_cdevpriv((void **)&data);
 	if (err != 0)
@@ -228,7 +297,7 @@
 	fuse_lck_mtx_lock(data->ms_mtx);
 again:
 	if (fdata_get_dead(data)) {
-		SDT_PROBE2(fuse, , device, trace, 2,
+		SDT_PROBE2(fusefs, , device, trace, 2,
 			"we know early on that reader should be kicked so we "
 			"don't wait for news");
 		fuse_lck_mtx_unlock(data->ms_mtx);
@@ -256,7 +325,7 @@
 		 * -- and some other cases, too, tho not totally clear, when
 		 * (cv_signal/wakeup_one signals the whole process ?)
 		 */
-		SDT_PROBE2(fuse, , device, trace, 1, "no message on thread");
+		SDT_PROBE2(fusefs, , device, trace, 1, "no message on thread");
 		goto again;
 	}
 	fuse_lck_mtx_unlock(data->ms_mtx);
@@ -266,9 +335,10 @@
 		 * somebody somewhere -- eg., umount routine --
 		 * wants this liaison finished off
 		 */
-		SDT_PROBE2(fuse, , device, trace, 2, "reader is to be sacked");
+		SDT_PROBE2(fusefs, , device, trace, 2,
+			"reader is to be sacked");
 		if (tick) {
-			SDT_PROBE2(fuse, , device, trace, 2, "weird -- "
+			SDT_PROBE2(fusefs, , device, trace, 2, "weird -- "
 				"\"kick\" is set tho there is message");
 			FUSE_ASSERT_MS_DONE(tick);
 			fuse_ticket_drop(tick);
@@ -276,7 +346,7 @@
 		return (ENODEV);	/* This should make the daemon get off
 					 * of us */
 	}
-	SDT_PROBE2(fuse, , device, trace, 1,
+	SDT_PROBE2(fusefs, , device, trace, 1,
 		"fuse device read message successfully");
 
 	KASSERT(tick->tk_ms_bufdata || tick->tk_ms_bufsize == 0,
@@ -311,7 +381,7 @@
 		 */
 		if (uio->uio_resid < buflen[i]) {
 			fdata_set_dead(data);
-			SDT_PROBE2(fuse, , device, trace, 2,
+			SDT_PROBE2(fusefs, , device, trace, 2,
 			    "daemon is stupid, kick it off...");
 			err = ENODEV;
 			break;
@@ -331,23 +401,26 @@
 fuse_ohead_audit(struct fuse_out_header *ohead, struct uio *uio)
 {
 	if (uio->uio_resid + sizeof(struct fuse_out_header) != ohead->len) {
-		SDT_PROBE2(fuse, , device, trace, 1, "Format error: body size "
+		SDT_PROBE2(fusefs, , device, trace, 1,
+			"Format error: body size "
 			"differs from size claimed by header");
 		return (EINVAL);
 	}
-	if (uio->uio_resid && ohead->error) {
-		SDT_PROBE2(fuse, , device, trace, 1, 
+	if (uio->uio_resid && ohead->unique != 0 && ohead->error) {
+		SDT_PROBE2(fusefs, , device, trace, 1, 
 			"Format error: non zero error but message had a body");
 		return (EINVAL);
 	}
-	/* Sanitize the linuxism of negative errnos */
-	ohead->error = -(ohead->error);
 
 	return (0);
 }
 
-SDT_PROBE_DEFINE1(fuse, , device, fuse_device_write_bumped_into_callback,
-		"uint64_t");
+SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_notify,
+	"struct fuse_out_header*");
+SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_missing_ticket,
+	"uint64_t");
+SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_found,
+	"struct fuse_ticket*");
 /*
  * fuse_device_write first reads the header sent by the daemon.
  * If that's OK, looks up ticket/callback node by the unique id seen in header.
@@ -360,15 +433,17 @@
 	struct fuse_out_header ohead;
 	int err = 0;
 	struct fuse_data *data;
-	struct fuse_ticket *tick, *x_tick;
+	struct mount *mp;
+	struct fuse_ticket *tick, *itick, *x_tick;
 	int found = 0;
 
 	err = devfs_get_cdevpriv((void **)&data);
 	if (err != 0)
 		return (err);
+	mp = data->mp;
 
 	if (uio->uio_resid < sizeof(struct fuse_out_header)) {
-		SDT_PROBE2(fuse, , device, trace, 1,
+		SDT_PROBE2(fusefs, , device, trace, 1,
 			"fuse_device_write got less than a header!");
 		fdata_set_dead(data);
 		return (EINVAL);
@@ -393,15 +468,29 @@
 	fuse_lck_mtx_lock(data->aw_mtx);
 	TAILQ_FOREACH_SAFE(tick, &data->aw_head, tk_aw_link,
 	    x_tick) {
-		SDT_PROBE1(fuse, , device,
-			fuse_device_write_bumped_into_callback,
-			tick->tk_unique);
 		if (tick->tk_unique == ohead.unique) {
+			SDT_PROBE1(fusefs, , device, fuse_device_write_found,
+				tick);
 			found = 1;
 			fuse_aw_remove(tick);
 			break;
 		}
 	}
+	if (found && tick->irq_unique > 0) {
+		/* 
+		 * Discard the FUSE_INTERRUPT ticket that tried to interrupt
+		 * this operation
+		 */
+		TAILQ_FOREACH_SAFE(itick, &data->aw_head, tk_aw_link,
+		    x_tick) {
+			if (itick->tk_unique == tick->irq_unique) {
+				fuse_aw_remove(itick);
+				fuse_ticket_drop(itick);
+				break;
+			}
+		}
+		tick->irq_unique = 0;
+	}
 	fuse_lck_mtx_unlock(data->aw_mtx);
 
 	if (found) {
@@ -414,13 +503,15 @@
 			 * via ticket_drop(), so no manual mucking
 			 * around...)
 			 */
-			SDT_PROBE2(fuse, , device, trace, 1,
+			SDT_PROBE2(fusefs, , device, trace, 1,
 				"pass ticket to a callback");
+			/* Sanitize the linuxism of negative errnos */
+			ohead.error *= -1;
 			memcpy(&tick->tk_aw_ohead, &ohead, sizeof(ohead));
 			err = tick->tk_aw_handler(tick, uio);
 		} else {
 			/* pretender doesn't wanna do anything with answer */
-			SDT_PROBE2(fuse, , device, trace, 1,
+			SDT_PROBE2(fusefs, , device, trace, 1,
 				"stuff devalidated, so we drop it");
 		}
 
@@ -430,11 +521,51 @@
 		 * because fuse_ticket_drop() will deal with refcount anyway.
 		 */
 		fuse_ticket_drop(tick);
+	} else if (ohead.unique == 0){
+		/* unique == 0 means asynchronous notification */
+		SDT_PROBE1(fusefs, , device, fuse_device_write_notify, &ohead);
+		switch (ohead.error) {
+		case FUSE_NOTIFY_INVAL_ENTRY:
+			err = fuse_internal_invalidate_entry(mp, uio);
+			break;
+		case FUSE_NOTIFY_INVAL_INODE:
+			err = fuse_internal_invalidate_inode(mp, uio);
+			break;
+		case FUSE_NOTIFY_RETRIEVE:
+		case FUSE_NOTIFY_STORE:
+			/*
+			 * Unimplemented.  I don't know of any file systems
+			 * that use them, and the protocol isn't sound anyway,
+			 * since the notification messages don't include the
+			 * inode's generation number.  Without that, it's
+			 * possible to manipulate the cache of the wrong vnode.
+			 * Finally, it's not defined what this message should
+			 * do for a file with dirty cache.
+			 */
+		case FUSE_NOTIFY_POLL:
+			/* Unimplemented.  See comments in fuse_vnops */
+		default:
+			/* Not implemented */
+			err = ENOSYS;
+		}
 	} else {
 		/* no callback at all! */
-		SDT_PROBE2(fuse, , device, trace, 1,
-			"erhm, no handler for this response");
-		err = EINVAL;
+		SDT_PROBE1(fusefs, , device, fuse_device_write_missing_ticket, 
+			ohead.unique);
+		if (ohead.error == -EAGAIN) {
+			/* 
+			 * This was probably a response to a FUSE_INTERRUPT
+			 * operation whose original operation is already
+			 * complete.  We can't store FUSE_INTERRUPT tickets
+			 * indefinitely because their responses are optional.
+			 * So we delete them when the original operation
+			 * completes.  And sadly the fuse_header_out doesn't
+			 * identify the opcode, so we have to guess.
+			 */
+			err = 0;
+		} else {
+			err = EINVAL;
+		}
 	}
 
 	return (err);
@@ -445,7 +576,7 @@
 {
 
 	fuse_dev = make_dev(&fuse_device_cdevsw, 0, UID_ROOT, GID_OPERATOR,
-	    S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, "fuse");
+	    S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH, "fuse");
 	if (fuse_dev == NULL)
 		return (ENOMEM);
 	return (0);
Index: sys/fs/fuse/fuse_file.h
===================================================================
--- sys/fs/fuse/fuse_file.h
+++ sys/fs/fuse/fuse_file.h
@@ -32,6 +32,11 @@
  * 
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
+ *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -66,52 +71,115 @@
 #include <sys/mman.h>
 #include <sys/vnode.h>
 
+/* 
+ * The fufh type is the access mode of the fuse file handle.  It's the portion
+ * of the open(2) flags related to permission.
+ */
 typedef enum fufh_type {
 	FUFH_INVALID = -1,
-	FUFH_RDONLY  = 0,
-	FUFH_WRONLY  = 1,
-	FUFH_RDWR    = 2,
-	FUFH_MAXTYPE = 3,
+	FUFH_RDONLY  = O_RDONLY,
+	FUFH_WRONLY  = O_WRONLY,
+	FUFH_RDWR    = O_RDWR,
+	FUFH_EXEC    = O_EXEC,
 } fufh_type_t;
-_Static_assert(FUFH_RDONLY == O_RDONLY, "RDONLY");
-_Static_assert(FUFH_WRONLY == O_WRONLY, "WRONLY");
-_Static_assert(FUFH_RDWR == O_RDWR, "RDWR");
 
+/*
+ * FUSE File Handles
+ *
+ * The FUSE protocol says that a server may assign a unique 64-bit file handle
+ * every time that a file is opened.  Effectively, that's once for each file
+ * descriptor.
+ *
+ * Unfortunately, the VFS doesn't help us here.  VOPs don't have a
+ * struct file* argument.  fileops do, but many syscalls bypass the fileops
+ * layer and go straight to a vnode.  Some, like writing from cache, can't
+ * track a file handle even in theory.  The entire concept of the file handle
+ * is a product of FUSE's Linux origins; Linux lacks vnodes and almost every
+ * file system operation takes a struct file* argument.
+ *
+ * Since FreeBSD's VFS is more file descriptor-agnostic, we must store FUSE
+ * filehandles in the vnode.  One option would be to only store a single file
+ * handle and never open FUSE files concurrently.  That's what NetBSD does.
+ * But that violates FUSE's security model.  FUSE expects the server to do all
+ * authorization (except when mounted with -o default_permissions).  In order
+ * to do that, the server needs us to send FUSE_OPEN every time somebody opens
+ * a new file descriptor.
+ *
+ * Another option would be to never open FUSE files concurrently, but send a
+ * FUSE_ACCESS prior to every open after the first.  That would give the server
+ * the opportunity to authorize the access.  Unfortunately, the FUSE protocol
+ * makes ACCESS optional.  File systems that don't implement it are assumed to
+ * authorize everything.  A survey of 32 fuse file systems showed that only 14
+ * implemented access.  Among the laggards were a few that really ought to be
+ * doing server-side authorization.
+ *
+ * So we do something hacky, similar to what OpenBSD, Illumos, and OSXFuse do.
+ * we store a list of file handles, one for each combination of vnode, uid,
+ * gid, pid, and access mode.  When opening a file, we first check whether
+ * there's already a matching file handle.  If so, we reuse it.  If not, we
+ * send FUSE_OPEN and create a new file handle.  That minimizes the number of
+ * open file handles while still allowing the server to authorize stuff.
+ *
+ * VOPs that need a file handle search through the list for a close match.
+ * They can't be guaranteed of finding an exact match because, for example, a
+ * process may have changed its UID since opening the file.  Also, most VOPs
+ * don't know exactly what permission they need.  Is O_RDWR required or is
+ * O_RDONLY good enough?  So the file handle we end up using may not be exactly
+ * the one we're supposed to use with that file descriptor.  But if the FUSE
+ * file system isn't too picky, it will work.  (FWIW even Linux sometimes
+ * guesses the file handle, during writes from cache or most SETATTR
+ * operations).
+ *
+ * I suspect this mess is part of the reason why neither NFS nor 9P have an
+ * equivalent of FUSE file handles.
+ */
 struct fuse_filehandle {
+	LIST_ENTRY(fuse_filehandle) next;
+
+	/* The filehandle returned by FUSE_OPEN */
 	uint64_t fh_id;
-	fufh_type_t fh_type;
-};
 
-#define FUFH_IS_VALID(f)  ((f)->fh_type != FUFH_INVALID)
+	/*
+	 * flags returned by FUSE_OPEN
+	 * Supported flags: FOPEN_DIRECT_IO, FOPEN_KEEP_CACHE
+	 * Unsupported:
+	 *     FOPEN_NONSEEKABLE: Adding support would require a new per-file
+	 *     or per-vnode attribute, which would have to be checked by
+	 *     kern_lseek (and others) for every file system.  The benefit is
+	 *     dubious, since I'm unaware of any file systems in ports that use
+	 *     this flag.
+	 */
+	uint32_t fuse_open_flags;
 
-static inline fufh_type_t
-fuse_filehandle_xlate_from_mmap(int fflags)
-{
-	if (fflags & (PROT_READ | PROT_WRITE))
-		return FUFH_RDWR;
-	else if (fflags & (PROT_WRITE))
-		return FUFH_WRONLY;
-	else if ((fflags & PROT_READ) || (fflags & PROT_EXEC))
-		return FUFH_RDONLY;
-	else
-		return FUFH_INVALID;
-}
+	/* The access mode of the file handle */
+	fufh_type_t fufh_type;
 
-static inline fufh_type_t
-fuse_filehandle_xlate_from_fflags(int fflags)
-{
-	if ((fflags & FREAD) && (fflags & FWRITE))
-		return FUFH_RDWR;
-	else if (fflags & (FWRITE))
-		return FUFH_WRONLY;
-	else if (fflags & (FREAD))
-		return FUFH_RDONLY;
-	else
-		panic("FUSE: What kind of a flag is this (%x)?", fflags);
-}
+	/* Credentials used to open the file */
+	gid_t gid;
+	pid_t pid;
+	uid_t uid;
+};
 
+#define FUFH_IS_VALID(f)  ((f)->fufh_type != FUFH_INVALID)
+
+/*
+ * Get the flags to use for FUSE_CREATE, FUSE_OPEN and FUSE_RELEASE
+ *
+ * These are supposed to be the same as the flags argument to open(2).
+ * However, since we can't reliably associate a fuse_filehandle with a specific
+ * file descriptor it would would be dangerous to include anything more than
+ * the access mode flags.  For example, suppose we open a file twice, once with
+ * O_APPEND and once without.  Then the user pwrite(2)s to offset using the
+ * second file descriptor.  If fusefs uses the first file handle, then the
+ * server may append the write to the end of the file rather than at offset 0.
+ * To prevent problems like this, we only ever send the portion of flags
+ * related to access mode.
+ *
+ * It's essential to send that portion, because FUSE uses it for server-side
+ * authorization.
+ */
 static inline int
-fuse_filehandle_xlate_to_oflags(fufh_type_t type)
+fufh_type_2_fflags(fufh_type_t type)
 {
 	int oflags = -1;
 
@@ -119,6 +187,7 @@
 	case FUFH_RDONLY:
 	case FUFH_WRONLY:
 	case FUFH_RDWR:
+	case FUFH_EXEC:
 		oflags = type;
 		break;
 	default:
@@ -128,19 +197,28 @@
 	return oflags;
 }
 
-int fuse_filehandle_valid(struct vnode *vp, fufh_type_t fufh_type);
-fufh_type_t fuse_filehandle_validrw(struct vnode *vp, fufh_type_t fufh_type);
-int fuse_filehandle_get(struct vnode *vp, fufh_type_t fufh_type,
-                        struct fuse_filehandle **fufhp);
-int fuse_filehandle_getrw(struct vnode *vp, fufh_type_t fufh_type,
-                          struct fuse_filehandle **fufhp);
+bool fuse_filehandle_validrw(struct vnode *vp, int mode,
+	struct ucred *cred, pid_t pid);
+int fuse_filehandle_get(struct vnode *vp, int fflag,
+                        struct fuse_filehandle **fufhp, struct ucred *cred,
+			pid_t pid);
+int fuse_filehandle_get_anyflags(struct vnode *vp,
+                        struct fuse_filehandle **fufhp, struct ucred *cred,
+			pid_t pid);
+int fuse_filehandle_getrw(struct vnode *vp, int fflag,
+                          struct fuse_filehandle **fufhp, struct ucred *cred,
+			  pid_t pid);
 
 void fuse_filehandle_init(struct vnode *vp, fufh_type_t fufh_type,
-		          struct fuse_filehandle **fufhp, uint64_t fh_id);
-int fuse_filehandle_open(struct vnode *vp, fufh_type_t fufh_type,
+		          struct fuse_filehandle **fufhp, struct thread *td,
+			  struct ucred *cred, struct fuse_open_out *foo);
+int fuse_filehandle_open(struct vnode *vp, int mode,
                          struct fuse_filehandle **fufhp, struct thread *td,
                          struct ucred *cred);
-int fuse_filehandle_close(struct vnode *vp, fufh_type_t fufh_type,
+int fuse_filehandle_close(struct vnode *vp, struct fuse_filehandle *fufh,
                           struct thread *td, struct ucred *cred);
+
+void fuse_file_init(void);
+void fuse_file_destroy(void);
 
 #endif /* _FUSE_FILE_H_ */
Index: sys/fs/fuse/fuse_file.c
===================================================================
--- sys/fs/fuse/fuse_file.c
+++ sys/fs/fuse/fuse_file.c
@@ -33,6 +33,11 @@
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -59,6 +64,7 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/counter.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
@@ -79,52 +85,61 @@
 #include "fuse.h"
 #include "fuse_file.h"
 #include "fuse_internal.h"
+#include "fuse_io.h"
 #include "fuse_ipc.h"
 #include "fuse_node.h"
 
-SDT_PROVIDER_DECLARE(fuse);
+MALLOC_DEFINE(M_FUSE_FILEHANDLE, "fuse_filefilehandle", "FUSE file handle");
+
+SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
-SDT_PROBE_DEFINE2(fuse, , file, trace, "int", "char*");
+SDT_PROBE_DEFINE2(fusefs, , file, trace, "int", "char*");
 
-static int fuse_fh_count = 0;
+static counter_u64_t fuse_fh_count;
 
-SYSCTL_INT(_vfs_fusefs, OID_AUTO, filehandle_count, CTLFLAG_RD,
-    &fuse_fh_count, 0, "number of open FUSE filehandles");
+SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, filehandle_count, CTLFLAG_RD,
+    &fuse_fh_count, "number of open FUSE filehandles");
 
+/* Get the FUFH type for a particular access mode */
+static inline fufh_type_t
+fflags_2_fufh_type(int fflags)
+{
+	if ((fflags & FREAD) && (fflags & FWRITE))
+		return FUFH_RDWR;
+	else if (fflags & (FWRITE))
+		return FUFH_WRONLY;
+	else if (fflags & (FREAD))
+		return FUFH_RDONLY;
+	else if (fflags & (FEXEC))
+		return FUFH_EXEC;
+	else
+		panic("FUSE: What kind of a flag is this (%x)?", fflags);
+}
+
 int
-fuse_filehandle_open(struct vnode *vp, fufh_type_t fufh_type,
+fuse_filehandle_open(struct vnode *vp, int a_mode,
     struct fuse_filehandle **fufhp, struct thread *td, struct ucred *cred)
 {
 	struct fuse_dispatcher fdi;
 	struct fuse_open_in *foi;
 	struct fuse_open_out *foo;
+	fufh_type_t fufh_type;
 
 	int err = 0;
 	int oflags = 0;
 	int op = FUSE_OPEN;
 
-	if (fuse_filehandle_valid(vp, fufh_type)) {
-		panic("FUSE: filehandle_open called despite valid fufh (type=%d)",
-		    fufh_type);
-		/* NOTREACHED */
-	}
-	/*
-	 * Note that this means we are effectively FILTERING OUT open() flags.
-	 */
-	oflags = fuse_filehandle_xlate_to_oflags(fufh_type);
+	fufh_type = fflags_2_fufh_type(a_mode);
+	oflags = fufh_type_2_fflags(fufh_type);
 
 	if (vnode_isdir(vp)) {
 		op = FUSE_OPENDIR;
-		if (fufh_type != FUFH_RDONLY) {
-			SDT_PROBE2(fuse, , file, trace, 1,
-				"non-rdonly fh requested for a directory?");
-			printf("FUSE:non-rdonly fh requested for a directory?\n");
-			fufh_type = FUFH_RDONLY;
-		}
+		/* vn_open_vnode already rejects FWRITE on directories */
+		MPASS(fufh_type == FUFH_RDONLY || fufh_type == FUFH_EXEC);
 	}
 	fdisp_init(&fdi, sizeof(*foi));
 	fdisp_make_vp(&fdi, op, vp, td, cred);
@@ -133,7 +148,7 @@
 	foi->flags = oflags;
 
 	if ((err = fdisp_wait_answ(&fdi))) {
-		SDT_PROBE2(fuse, , file, trace, 1,
+		SDT_PROBE2(fusefs, , file, trace, 1,
 			"OUCH ... daemon didn't give fh");
 		if (err == ENOENT) {
 			fuse_internal_vnode_disappear(vp);
@@ -142,42 +157,24 @@
 	}
 	foo = fdi.answ;
 
-	fuse_filehandle_init(vp, fufh_type, fufhp, foo->fh);
+	fuse_filehandle_init(vp, fufh_type, fufhp, td, cred, foo);
+	fuse_vnode_open(vp, foo->open_flags, td);
 
-	/*
-	 * For WRONLY opens, force DIRECT_IO.  This is necessary
-	 * since writing a partial block through the buffer cache
-	 * will result in a read of the block and that read won't
-	 * be allowed by the WRONLY open.
-	 */
-	if (fufh_type == FUFH_WRONLY)
-		fuse_vnode_open(vp, foo->open_flags | FOPEN_DIRECT_IO, td);
-	else
-		fuse_vnode_open(vp, foo->open_flags, td);
-
 out:
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 int
-fuse_filehandle_close(struct vnode *vp, fufh_type_t fufh_type,
+fuse_filehandle_close(struct vnode *vp, struct fuse_filehandle *fufh,
     struct thread *td, struct ucred *cred)
 {
 	struct fuse_dispatcher fdi;
 	struct fuse_release_in *fri;
-	struct fuse_vnode_data *fvdat = VTOFUD(vp);
-	struct fuse_filehandle *fufh = NULL;
 
 	int err = 0;
 	int op = FUSE_RELEASE;
 
-	fufh = &(fvdat->fufh[fufh_type]);
-	if (!FUFH_IS_VALID(fufh)) {
-		panic("FUSE: filehandle_put called on invalid fufh (type=%d)",
-		    fufh_type);
-		/* NOTREACHED */
-	}
 	if (fuse_isdeadfs(vp)) {
 		goto out;
 	}
@@ -187,96 +184,194 @@
 	fdisp_make_vp(&fdi, op, vp, td, cred);
 	fri = fdi.indata;
 	fri->fh = fufh->fh_id;
-	fri->flags = fuse_filehandle_xlate_to_oflags(fufh_type);
+	fri->flags = fufh_type_2_fflags(fufh->fufh_type);
+	/* 
+	 * If the file has a POSIX lock then we're supposed to set lock_owner.
+	 * If not, then lock_owner is undefined.  So we may as well always set
+	 * it.
+	 */
+	fri->lock_owner = td->td_proc->p_pid;
 
 	err = fdisp_wait_answ(&fdi);
 	fdisp_destroy(&fdi);
 
 out:
-	atomic_subtract_acq_int(&fuse_fh_count, 1);
-	fufh->fh_id = (uint64_t)-1;
-	fufh->fh_type = FUFH_INVALID;
+	counter_u64_add(fuse_fh_count, -1);
+	LIST_REMOVE(fufh, next);
+	free(fufh, M_FUSE_FILEHANDLE);
 
 	return err;
 }
 
-int
-fuse_filehandle_valid(struct vnode *vp, fufh_type_t fufh_type)
-{
-	struct fuse_vnode_data *fvdat = VTOFUD(vp);
-	struct fuse_filehandle *fufh;
-
-	fufh = &(fvdat->fufh[fufh_type]);
-	return FUFH_IS_VALID(fufh);
-}
-
 /*
  * Check for a valid file handle, first the type requested, but if that
  * isn't valid, try for FUFH_RDWR.
- * Return the FUFH type that is valid or FUFH_INVALID if there are none.
- * This is a variant of fuse_filehandle_vaild() analogous to
- * fuse_filehandle_getrw().
+ * Return true if there is any file handle with the correct credentials and
+ * a fufh type that includes the provided one.
+ * A pid of 0 means "don't care"
  */
-fufh_type_t
-fuse_filehandle_validrw(struct vnode *vp, fufh_type_t fufh_type)
+bool
+fuse_filehandle_validrw(struct vnode *vp, int mode,
+	struct ucred *cred, pid_t pid)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_filehandle *fufh;
+	fufh_type_t fufh_type = fflags_2_fufh_type(mode);
 
-	fufh = &fvdat->fufh[fufh_type];
-	if (FUFH_IS_VALID(fufh) != 0)
-		return (fufh_type);
-	fufh = &fvdat->fufh[FUFH_RDWR];
-	if (FUFH_IS_VALID(fufh) != 0)
-		return (FUFH_RDWR);
-	return (FUFH_INVALID);
+	/* 
+	 * Unlike fuse_filehandle_get, we want to search for a filehandle with
+	 * the exact cred, and no fallback
+	 */
+	LIST_FOREACH(fufh, &fvdat->handles, next) {
+		if (fufh->fufh_type == fufh_type &&
+		    fufh->uid == cred->cr_uid &&
+		    fufh->gid == cred->cr_rgid &&
+		    (pid == 0 || fufh->pid == pid))
+			return true;
+	}
+
+	if (fufh_type == FUFH_EXEC)
+		return false;
+
+	/* Fallback: find a RDWR list entry with the right cred */
+	LIST_FOREACH(fufh, &fvdat->handles, next) {
+		if (fufh->fufh_type == FUFH_RDWR &&
+		    fufh->uid == cred->cr_uid &&
+		    fufh->gid == cred->cr_rgid &&
+		    (pid == 0 || fufh->pid == pid))
+			return true;
+	}
+
+	return false;
 }
 
 int
-fuse_filehandle_get(struct vnode *vp, fufh_type_t fufh_type,
-    struct fuse_filehandle **fufhp)
+fuse_filehandle_get(struct vnode *vp, int fflag,
+    struct fuse_filehandle **fufhp, struct ucred *cred, pid_t pid)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_filehandle *fufh;
+	fufh_type_t fufh_type;
 
-	fufh = &(fvdat->fufh[fufh_type]);
-	if (!FUFH_IS_VALID(fufh))
+	fufh_type = fflags_2_fufh_type(fflag);
+	/* cred can be NULL for in-kernel clients */
+	if (cred == NULL)
+		goto fallback;
+
+	LIST_FOREACH(fufh, &fvdat->handles, next) {
+		if (fufh->fufh_type == fufh_type &&
+		    fufh->uid == cred->cr_uid &&
+		    fufh->gid == cred->cr_rgid &&
+		    (pid == 0 || fufh->pid == pid))
+			goto found;
+	}
+
+fallback:
+	/* Fallback: find a list entry with the right flags */
+	LIST_FOREACH(fufh, &fvdat->handles, next) {
+		if (fufh->fufh_type == fufh_type)
+			break;
+	}
+
+	if (fufh == NULL)
 		return EBADF;
+
+found:
 	if (fufhp != NULL)
 		*fufhp = fufh;
 	return 0;
 }
 
+/* Get a file handle with any kind of flags */
 int
-fuse_filehandle_getrw(struct vnode *vp, fufh_type_t fufh_type,
-    struct fuse_filehandle **fufhp)
+fuse_filehandle_get_anyflags(struct vnode *vp,
+    struct fuse_filehandle **fufhp, struct ucred *cred, pid_t pid)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_filehandle *fufh;
 
-	fufh = &(fvdat->fufh[fufh_type]);
-	if (!FUFH_IS_VALID(fufh)) {
-		fufh_type = FUFH_RDWR;
+	if (cred == NULL)
+		goto fallback;
+
+	LIST_FOREACH(fufh, &fvdat->handles, next) {
+		if (fufh->uid == cred->cr_uid &&
+		    fufh->gid == cred->cr_rgid &&
+		    (pid == 0 || fufh->pid == pid))
+			goto found;
 	}
-	return fuse_filehandle_get(vp, fufh_type, fufhp);
+
+fallback:
+	/* Fallback: find any list entry */
+	fufh = LIST_FIRST(&fvdat->handles);
+
+	if (fufh == NULL)
+		return EBADF;
+
+found:
+	if (fufhp != NULL)
+		*fufhp = fufh;
+	return 0;
 }
 
+int
+fuse_filehandle_getrw(struct vnode *vp, int fflag,
+    struct fuse_filehandle **fufhp, struct ucred *cred, pid_t pid)
+{
+	int err;
+
+	err = fuse_filehandle_get(vp, fflag, fufhp, cred, pid);
+	if (err)
+		err = fuse_filehandle_get(vp, FREAD | FWRITE, fufhp, cred, pid);
+	return err;
+}
+
 void
 fuse_filehandle_init(struct vnode *vp, fufh_type_t fufh_type,
-    struct fuse_filehandle **fufhp, uint64_t fh_id)
+    struct fuse_filehandle **fufhp, struct thread *td, struct ucred *cred,
+    struct fuse_open_out *foo)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_filehandle *fufh;
 
-	fufh = &(fvdat->fufh[fufh_type]);
-	MPASS(!FUFH_IS_VALID(fufh));
-	fufh->fh_id = fh_id;
-	fufh->fh_type = fufh_type;
+	fufh = malloc(sizeof(struct fuse_filehandle), M_FUSE_FILEHANDLE,
+		M_WAITOK);
+	MPASS(fufh != NULL);
+	fufh->fh_id = foo->fh;
+	fufh->fufh_type = fufh_type;
+	fufh->gid = cred->cr_rgid;
+	fufh->uid = cred->cr_uid;
+	fufh->pid = td->td_proc->p_pid;
+	fufh->fuse_open_flags = foo->open_flags;
 	if (!FUFH_IS_VALID(fufh)) {
 		panic("FUSE: init: invalid filehandle id (type=%d)", fufh_type);
 	}
+	LIST_INSERT_HEAD(&fvdat->handles, fufh, next);
 	if (fufhp != NULL)
 		*fufhp = fufh;
 
-	atomic_add_acq_int(&fuse_fh_count, 1);
+	counter_u64_add(fuse_fh_count, 1);
+
+	if (foo->open_flags & FOPEN_DIRECT_IO) {
+		ASSERT_VOP_ELOCKED(vp, __func__);
+		VTOFUD(vp)->flag |= FN_DIRECTIO;
+		fuse_io_invalbuf(vp, td);
+	} else {
+		if ((foo->open_flags & FOPEN_KEEP_CACHE) == 0)
+			fuse_io_invalbuf(vp, td);
+	        VTOFUD(vp)->flag &= ~FN_DIRECTIO;
+	}
+
+}
+
+void
+fuse_file_init(void)
+{
+	fuse_fh_count = counter_u64_alloc(M_WAITOK);
+	counter_u64_zero(fuse_fh_count);
+}
+
+void
+fuse_file_destroy(void)
+{
+	counter_u64_free(fuse_fh_count);
 }
Index: sys/fs/fuse/fuse_internal.h
===================================================================
--- sys/fs/fuse/fuse_internal.h
+++ sys/fs/fuse/fuse_internal.h
@@ -32,6 +32,11 @@
  * 
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
+ *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -61,6 +66,7 @@
 #define _FUSE_INTERNAL_H_
 
 #include <sys/types.h>
+#include <sys/counter.h>
 #include <sys/uio.h>
 #include <sys/stat.h>
 #include <sys/vnode.h>
@@ -68,6 +74,9 @@
 #include "fuse_ipc.h"
 #include "fuse_node.h"
 
+extern counter_u64_t fuse_lookup_cache_hits;
+extern counter_u64_t fuse_lookup_cache_misses;
+
 static inline bool
 vfs_isrdonly(struct mount *mp)
 {
@@ -80,12 +89,6 @@
 	return (vp->v_mount);
 }
 
-static inline bool
-vnode_mountedhere(struct vnode *vp)
-{
-	return (vp->v_mountedhere != NULL);
-}
-
 static inline enum vtype
 vnode_vtype(struct vnode *vp)
 {
@@ -134,12 +137,6 @@
 	uio->uio_offset = offset;
 }
 
-static inline void
-uio_setresid(struct uio *uio, ssize_t resid)
-{
-	uio->uio_resid = resid;
-}
-
 /* miscellaneous */
 
 static inline bool
@@ -156,25 +153,57 @@
 	return (vp->v_mount->mnt_stat.f_iosize);
 }
 
-/* access */
+/*
+ * Make a cacheable timeout in bintime format value based on a fuse_attr_out
+ * response
+ */
+static inline void
+fuse_validity_2_bintime(uint64_t attr_valid, uint32_t attr_valid_nsec,
+	struct bintime *timeout)
+{
+	struct timespec now, duration, timeout_ts;
 
-#define FVP_ACCESS_NOOP		0x01
+	getnanouptime(&now);
+	/* "+ 2" is the bound of attr_valid_nsec + now.tv_nsec */
+	/* Why oh why isn't there a TIME_MAX defined? */
+	if (attr_valid >= INT_MAX || attr_valid + now.tv_sec + 2 >= INT_MAX) {
+		timeout->sec = INT_MAX;
+	} else {
+		duration.tv_sec = attr_valid;
+		duration.tv_nsec = attr_valid_nsec;
+		timespecadd(&duration, &now, &timeout_ts);
+		timespec2bintime(&timeout_ts, timeout);
+	}
+}
 
-#define FACCESS_VA_VALID	0x01
-#define FACCESS_DO_ACCESS	0x02
-#define FACCESS_STICKY		0x04
-#define FACCESS_CHOWN		0x08
-#define FACCESS_NOCHECKSPY	0x10
-#define FACCESS_SETGID		0x12
+/*
+ * Make a cacheable timeout value in timespec format based on the fuse_entry_out
+ * response
+ */
+static inline void
+fuse_validity_2_timespec(const struct fuse_entry_out *feo,
+	struct timespec *timeout)
+{
+	struct timespec duration, now;
 
-#define FACCESS_XQUERIES	(FACCESS_STICKY | FACCESS_CHOWN | FACCESS_SETGID)
+	getnanouptime(&now);
+	/* "+ 2" is the bound of entry_valid_nsec + now.tv_nsec */
+	if (feo->entry_valid >= INT_MAX ||
+	    feo->entry_valid + now.tv_sec + 2 >= INT_MAX) {
+		timeout->tv_sec = INT_MAX;
+	} else {
+		duration.tv_sec = feo->entry_valid;
+		duration.tv_nsec = feo->entry_valid_nsec;
+		timespecadd(&duration, &now, timeout);
+	}
+}
 
-struct fuse_access_param {
-	uid_t		xuid;
-	gid_t		xgid;
-	uint32_t	facc_flags;
-};
 
+/* VFS ops */
+int
+fuse_internal_get_cached_vnode(struct mount*, ino_t, int, struct vnode**);
+
+/* access */
 static inline int
 fuse_match_cred(struct ucred *basecred, struct ucred *usercred)
 {
@@ -189,8 +218,8 @@
 	return (EPERM);
 }
 
-int fuse_internal_access(struct vnode *vp, mode_t mode,
-    struct fuse_access_param *facp, struct thread *td, struct ucred *cred);
+int fuse_internal_access(struct vnode *vp, accmode_t mode,
+    struct thread *td, struct ucred *cred);
 
 /* attributes */
 void fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
@@ -198,21 +227,35 @@
 
 /* fsync */
 
-int fuse_internal_fsync(struct vnode *vp, struct thread *td,
-    struct ucred *cred, struct fuse_filehandle *fufh);
+int fuse_internal_fsync(struct vnode *vp, struct thread *td, int waitfor,
+	bool datasync);
 int fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio);
 
-/* readdir */
+/* getattr */
+int fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
+	struct ucred *cred, struct thread *td);
+int fuse_internal_getattr(struct vnode *vp, struct vattr *vap,
+	struct ucred *cred, struct thread *td);
 
+/* asynchronous invalidation */
+int fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio);
+int fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio);
+
+/* mknod */
+int fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp,
+	struct componentname *cnp, struct vattr *vap);
+
+/* readdir */
 struct pseudo_dirent {
 	uint32_t d_namlen;
 };
+int fuse_internal_readdir(struct vnode *vp, struct uio *uio, off_t startoff,
+    struct fuse_filehandle *fufh, struct fuse_iov *cookediov, int *ncookies,
+    u_long *cookies);
+int fuse_internal_readdir_processdata(struct uio *uio, off_t startoff,
+    int *fnd_start, size_t reqsize, void *buf, size_t bufsize,
+    struct fuse_iov *cookediov, int *ncookies, u_long **cookiesp);
 
-int fuse_internal_readdir(struct vnode *vp, struct uio *uio,
-    struct fuse_filehandle *fufh, struct fuse_iov *cookediov);
-int fuse_internal_readdir_processdata(struct uio *uio, size_t reqsize,
-    void *buf, size_t bufsize, void *param);
-
 /* remove */
 
 int fuse_internal_remove(struct vnode *dvp, struct vnode *vp,
@@ -227,6 +270,10 @@
 
 void fuse_internal_vnode_disappear(struct vnode *vp);
 
+/* setattr */
+int fuse_internal_setattr(struct vnode *vp, struct vattr *va,
+	struct thread *td, struct ucred *cred);
+
 /* strategy */
 
 /* entity creation */
@@ -270,5 +317,9 @@
 
 int fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio);
 void fuse_internal_send_init(struct fuse_data *data, struct thread *td);
+
+/* module load/unload */
+void fuse_internal_init(void);
+void fuse_internal_destroy(void);
 
 #endif /* _FUSE_INTERNAL_H_ */
Index: sys/fs/fuse/fuse_internal.c
===================================================================
--- sys/fs/fuse/fuse_internal.c
+++ sys/fs/fuse/fuse_internal.c
@@ -33,6 +33,11 @@
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -59,6 +64,7 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/counter.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
@@ -89,35 +95,78 @@
 #include "fuse.h"
 #include "fuse_file.h"
 #include "fuse_internal.h"
+#include "fuse_io.h"
 #include "fuse_ipc.h"
 #include "fuse_node.h"
 #include "fuse_file.h"
-#include "fuse_param.h"
 
-SDT_PROVIDER_DECLARE(fuse);
+SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
-SDT_PROBE_DEFINE2(fuse, , internal, trace, "int", "char*");
+SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*");
 
 #ifdef ZERO_PAD_INCOMPLETE_BUFS
 static int isbzero(void *buf, size_t len);
 
 #endif
 
-/* access */
+counter_u64_t fuse_lookup_cache_hits;
+counter_u64_t fuse_lookup_cache_misses;
 
+SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_hits, CTLFLAG_RD,
+    &fuse_lookup_cache_hits, "number of positive cache hits in lookup");
+
+SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_misses, CTLFLAG_RD,
+    &fuse_lookup_cache_misses, "number of cache misses in lookup");
+
 int
+fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags,
+	struct vnode **vpp)
+{
+	struct bintime now;
+	struct thread *td = curthread;
+	uint64_t nodeid = ino;
+	int error;
+
+	*vpp = NULL;
+
+	error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp,
+	    fuse_vnode_cmp, &nodeid);
+	if (error)
+		return error;
+	/*
+	 * Check the entry cache timeout.  We have to do this within fusefs
+	 * instead of by using cache_enter_time/cache_lookup because those
+	 * routines are only intended to work with pathnames, not inodes
+	 */
+	if (*vpp != NULL) {
+		getbinuptime(&now);
+		if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){
+			counter_u64_add(fuse_lookup_cache_hits, 1);
+			return 0;
+		} else {
+			/* Entry cache timeout */
+			counter_u64_add(fuse_lookup_cache_misses, 1);
+			cache_purge(*vpp);
+			vput(*vpp);
+			*vpp = NULL;
+		}
+	}
+	return 0;
+}
+
+/* Synchronously send a FUSE_ACCESS operation */
+int
 fuse_internal_access(struct vnode *vp,
-    mode_t mode,
-    struct fuse_access_param *facp,
+    accmode_t mode,
     struct thread *td,
     struct ucred *cred)
 {
 	int err = 0;
-	uint32_t mask = 0;
+	uint32_t mask = F_OK;
 	int dataflags;
 	int vtype;
 	struct mount *mp;
@@ -125,77 +174,57 @@
 	struct fuse_access_in *fai;
 	struct fuse_data *data;
 
-	/* NOT YET DONE */
-	/*
-	 * If this vnop gives you trouble, just return 0 here for a lazy
-	 * kludge.
-	 */
-	/* return 0;*/
-
 	mp = vnode_mount(vp);
 	vtype = vnode_vtype(vp);
 
 	data = fuse_get_mpdata(mp);
 	dataflags = data->dataflags;
 
-	if ((mode & VWRITE) && vfs_isrdonly(mp)) {
-		return EACCES;
-	}
-	/* Unless explicitly permitted, deny everyone except the fs owner. */
-	    if (vnode_isvroot(vp) && !(facp->facc_flags & FACCESS_NOCHECKSPY)) {
-		if (!(dataflags & FSESS_DAEMON_CAN_SPY)) {
-			int denied = fuse_match_cred(data->daemoncred,
-			    cred);
+	if (mode == 0)
+		return 0;
 
-			if (denied) {
-				return EPERM;
-			}
+	if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) {
+		switch (vp->v_type) {
+		case VDIR:
+			/* FALLTHROUGH */
+		case VLNK:
+			/* FALLTHROUGH */
+		case VREG:
+			return EROFS;
+		default:
+			break;
 		}
-		facp->facc_flags |= FACCESS_NOCHECKSPY;
 	}
-	if (!(facp->facc_flags & FACCESS_DO_ACCESS)) {
-		return 0;
+
+	/* Unless explicitly permitted, deny everyone except the fs owner. */
+	if (!(dataflags & FSESS_DAEMON_CAN_SPY)) {
+		if (fuse_match_cred(data->daemoncred, cred))
+			return EPERM;
 	}
-	if (((vtype == VREG) && (mode & VEXEC))) {
-#ifdef NEED_MOUNT_ARGUMENT_FOR_THIS
-		/* Let	 the kernel handle this through open / close heuristics.*/
-		    return ENOTSUP;
-#else
-		    /* 	Let the kernel handle this. */
-		    return 0;
-#endif
-	}
-	if (!fsess_isimpl(mp, FUSE_ACCESS)) {
-		/* Let the kernel handle this. */
-		    return 0;
-	}
+
 	if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
-		/* Let the kernel handle this. */
-		    return 0;
+		struct vattr va;
+
+		fuse_internal_getattr(vp, &va, cred, td);
+		return vaccess(vp->v_type, va.va_mode, va.va_uid,
+		    va.va_gid, mode, cred, NULL);
 	}
-	if ((mode & VADMIN) != 0) {
-		err = priv_check_cred(cred, PRIV_VFS_ADMIN);
-		if (err) {
-			return err;
-		}
-	}
-	if ((mode & (VWRITE | VAPPEND | VADMIN)) != 0) {
+
+	if (!fsess_isimpl(mp, FUSE_ACCESS))
+		return 0;
+
+	if ((mode & (VWRITE | VAPPEND | VADMIN)) != 0)
 		mask |= W_OK;
-	}
-	if ((mode & VREAD) != 0) {
+	if ((mode & VREAD) != 0)
 		mask |= R_OK;
-	}
-	if ((mode & VEXEC) != 0) {
+	if ((mode & VEXEC) != 0)
 		mask |= X_OK;
-	}
-	bzero(&fdi, sizeof(fdi));
 
 	fdisp_init(&fdi, sizeof(*fai));
 	fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred);
 
 	fai = fdi.indata;
-	fai->mask = F_OK;
-	fai->mask |= mask;
+	fai->mask = mask;
 
 	err = fdisp_wait_answ(&fdi);
 	fdisp_destroy(&fdi);
@@ -208,9 +237,9 @@
 }
 
 /*
- * Cache FUSE attributes from feo, in attr cache associated with vnode 'vp'.
- * Optionally, if argument 'vap' is not NULL, store a copy of the converted
- * attributes there as well.
+ * Cache FUSE attributes from attr, in attribute cache associated with vnode
+ * 'vp'.  Optionally, if argument 'vap' is not NULL, store a copy of the
+ * converted attributes there as well.
  *
  * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do
  * return the result to the caller).
@@ -221,49 +250,57 @@
 {
 	struct mount *mp;
 	struct fuse_vnode_data *fvdat;
+	struct fuse_data *data;
 	struct vattr *vp_cache_at;
 
 	mp = vnode_mount(vp);
 	fvdat = VTOFUD(vp);
+	data = fuse_get_mpdata(mp);
 
-	/* Honor explicit do-not-cache requests from user filesystems. */
-	if (attr_valid == 0 && attr_valid_nsec == 0)
-		fvdat->valid_attr_cache = false;
-	else
-		fvdat->valid_attr_cache = true;
+	ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
 
-	vp_cache_at = VTOVA(vp);
+	fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
+		&fvdat->attr_cache_timeout);
 
-	if (vap == NULL && vp_cache_at == NULL)
+	/* Fix our buffers if the filesize changed without us knowing */
+	if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) {
+		(void)fuse_vnode_setsize(vp, attr->size);
+		fvdat->cached_attrs.va_size = attr->size;
+	}
+
+	if (attr_valid > 0 || attr_valid_nsec > 0)
+		vp_cache_at = &(fvdat->cached_attrs);
+	else if (vap != NULL)
+		vp_cache_at = vap;
+	else
 		return;
 
-	if (vap == NULL)
-		vap = vp_cache_at;
-
-	vattr_null(vap);
-
-	vap->va_fsid = mp->mnt_stat.f_fsid.val[0];
-	vap->va_fileid = attr->ino;
-	vap->va_mode = attr->mode & ~S_IFMT;
-	vap->va_nlink     = attr->nlink;
-	vap->va_uid       = attr->uid;
-	vap->va_gid       = attr->gid;
-	vap->va_rdev      = attr->rdev;
-	vap->va_size      = attr->size;
+	vattr_null(vp_cache_at);
+	vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0];
+	vp_cache_at->va_fileid = attr->ino;
+	vp_cache_at->va_mode = attr->mode & ~S_IFMT;
+	vp_cache_at->va_nlink     = attr->nlink;
+	vp_cache_at->va_uid       = attr->uid;
+	vp_cache_at->va_gid       = attr->gid;
+	vp_cache_at->va_rdev      = attr->rdev;
+	vp_cache_at->va_size      = attr->size;
 	/* XXX on i386, seconds are truncated to 32 bits */
-	vap->va_atime.tv_sec  = attr->atime;
-	vap->va_atime.tv_nsec = attr->atimensec;
-	vap->va_mtime.tv_sec  = attr->mtime;
-	vap->va_mtime.tv_nsec = attr->mtimensec;
-	vap->va_ctime.tv_sec  = attr->ctime;
-	vap->va_ctime.tv_nsec = attr->ctimensec;
-	vap->va_blocksize = PAGE_SIZE;
-	vap->va_type = IFTOVT(attr->mode);
-	vap->va_bytes = attr->blocks * S_BLKSIZE;
-	vap->va_flags = 0;
+	vp_cache_at->va_atime.tv_sec  = attr->atime;
+	vp_cache_at->va_atime.tv_nsec = attr->atimensec;
+	vp_cache_at->va_mtime.tv_sec  = attr->mtime;
+	vp_cache_at->va_mtime.tv_nsec = attr->mtimensec;
+	vp_cache_at->va_ctime.tv_sec  = attr->ctime;
+	vp_cache_at->va_ctime.tv_nsec = attr->ctimensec;
+	if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0)
+		vp_cache_at->va_blocksize = attr->blksize;
+	else
+		vp_cache_at->va_blocksize = PAGE_SIZE;
+	vp_cache_at->va_type = IFTOVT(attr->mode);
+	vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE;
+	vp_cache_at->va_flags = 0;
 
-	if (vap != vp_cache_at && vp_cache_at != NULL)
-		memcpy(vp_cache_at, vap, sizeof(*vap));
+	if (vap != vp_cache_at && vap != NULL)
+		memcpy(vap, vp_cache_at, sizeof(*vap));
 }
 
 
@@ -281,47 +318,195 @@
 int
 fuse_internal_fsync(struct vnode *vp,
     struct thread *td,
-    struct ucred *cred,
-    struct fuse_filehandle *fufh)
+    int waitfor,
+    bool datasync)
 {
-	int op = FUSE_FSYNC;
-	struct fuse_fsync_in *ffsi;
+	struct fuse_fsync_in *ffsi = NULL;
 	struct fuse_dispatcher fdi;
+	struct fuse_filehandle *fufh;
+	struct fuse_vnode_data *fvdat = VTOFUD(vp);
+	struct mount *mp = vnode_mount(vp);
+	int op = FUSE_FSYNC;
+	int err = 0;
 
-	if (vnode_isdir(vp)) {
-		op = FUSE_FSYNCDIR;
+	if (!fsess_isimpl(vnode_mount(vp),
+	    (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
+		return 0;
 	}
-	fdisp_init(&fdi, sizeof(*ffsi));
-	fdisp_make_vp(&fdi, op, vp, td, cred);
-	ffsi = fdi.indata;
-	ffsi->fh = fufh->fh_id;
+	if (vnode_isdir(vp))
+		op = FUSE_FSYNCDIR;
 
-	ffsi->fsync_flags = 1;		/* datasync */
+	if (!fsess_isimpl(mp, op))
+		return 0;
 
-	fuse_insert_callback(fdi.tick, fuse_internal_fsync_callback);
-	fuse_insert_message(fdi.tick);
+	fdisp_init(&fdi, sizeof(*ffsi));
+	/*
+	 * fsync every open file handle for this file, because we can't be sure
+	 * which file handle the caller is really referring to.
+	 */
+	LIST_FOREACH(fufh, &fvdat->handles, next) {
+		if (ffsi == NULL)
+			fdisp_make_vp(&fdi, op, vp, td, NULL);
+		else
+			fdisp_refresh_vp(&fdi, op, vp, td, NULL);
+		ffsi = fdi.indata;
+		ffsi->fh = fufh->fh_id;
+		ffsi->fsync_flags = 0;
 
+		if (datasync)
+			ffsi->fsync_flags = 1;
+
+		if (waitfor == MNT_WAIT) {
+			err = fdisp_wait_answ(&fdi);
+		} else {
+			fuse_insert_callback(fdi.tick,
+				fuse_internal_fsync_callback);
+			fuse_insert_message(fdi.tick, false);
+		}
+		if (err == ENOSYS) {
+			/* ENOSYS means "success, and don't call again" */
+			fsess_set_notimpl(mp, op);
+			err = 0;
+			break;
+		}
+	}
 	fdisp_destroy(&fdi);
 
-	return 0;
+	return err;
+}
 
+/* Asynchronous invalidation */
+SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_cache_hit,
+	"struct vnode*", "struct vnode*");
+int
+fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio)
+{
+	struct fuse_notify_inval_entry_out fnieo;
+	struct componentname cn;
+	struct vnode *dvp, *vp;
+	char name[PATH_MAX];
+	int err;
+
+	if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0)
+		return (err);
+
+	if ((err = uiomove(name, fnieo.namelen, uio)) != 0)
+		return (err);
+	name[fnieo.namelen] = '\0';
+	/* fusefs does not cache "." or ".." entries */
+	if (strncmp(name, ".", sizeof(".")) == 0 ||
+	    strncmp(name, "..", sizeof("..")) == 0)
+		return (0);
+
+	if (fnieo.parent == FUSE_ROOT_ID)
+		err = VFS_ROOT(mp, LK_SHARED, &dvp);
+	else
+		err = fuse_internal_get_cached_vnode( mp, fnieo.parent,
+			LK_SHARED, &dvp);
+	/* 
+	 * If dvp is not in the cache, then it must've been reclaimed.  And
+	 * since fuse_vnop_reclaim does a cache_purge, name's entry must've
+	 * been invalidated already.  So we can safely return if dvp == NULL
+	 */
+	if (err != 0 || dvp == NULL)
+		return (err);
+	/*
+	 * XXX we can't check dvp's generation because the FUSE invalidate
+	 * entry message doesn't include it.  Worse case is that we invalidate
+	 * an entry that didn't need to be invalidated.
+	 */
+
+	cn.cn_nameiop = LOOKUP;
+	cn.cn_flags = 0;	/* !MAKEENTRY means free cached entry */
+	cn.cn_thread = curthread;
+	cn.cn_cred = curthread->td_ucred;
+	cn.cn_lkflags = LK_SHARED;
+	cn.cn_pnbuf = NULL;
+	cn.cn_nameptr = name;
+	cn.cn_namelen = fnieo.namelen;
+	err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
+	MPASS(err == 0);
+	fuse_vnode_clear_attr_cache(dvp);
+	vput(dvp);
+	return (0);
 }
 
+int
+fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio)
+{
+	struct fuse_notify_inval_inode_out fniio;
+	struct vnode *vp;
+	int err;
+
+	if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0)
+		return (err);
+
+	if (fniio.ino == FUSE_ROOT_ID)
+		err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
+	else
+		err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
+			&vp);
+	if (err != 0 || vp == NULL)
+		return (err);
+	/*
+	 * XXX we can't check vp's generation because the FUSE invalidate
+	 * entry message doesn't include it.  Worse case is that we invalidate
+	 * an inode that didn't need to be invalidated.
+	 */
+
+	/* 
+	 * Flush and invalidate buffers if off >= 0.  Technically we only need
+	 * to flush and invalidate the range of offsets [off, off + len), but
+	 * for simplicity's sake we do everything.
+	 */
+	if (fniio.off >= 0)
+		fuse_io_invalbuf(vp, curthread);
+	fuse_vnode_clear_attr_cache(vp);
+	vput(vp);
+	return (0);
+}
+
+/* mknod */
+int
+fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp,
+	struct componentname *cnp, struct vattr *vap)
+{
+	struct fuse_data *data;
+	struct fuse_mknod_in fmni;
+	size_t insize;
+
+	data = fuse_get_mpdata(dvp->v_mount);
+
+	fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode);
+	fmni.rdev = vap->va_rdev;
+	if (fuse_libabi_geq(data, 7, 12)) {
+		insize = sizeof(fmni);
+		fmni.umask = curthread->td_proc->p_fd->fd_cmask;
+	} else {
+		insize = FUSE_COMPAT_MKNOD_IN_SIZE;
+	}
+	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni,
+	    insize, vap->va_type));
+}
+
 /* readdir */
 
 int
 fuse_internal_readdir(struct vnode *vp,
     struct uio *uio,
+    off_t startoff,
     struct fuse_filehandle *fufh,
-    struct fuse_iov *cookediov)
+    struct fuse_iov *cookediov,
+    int *ncookies,
+    u_long *cookies)
 {
 	int err = 0;
 	struct fuse_dispatcher fdi;
-	struct fuse_read_in *fri;
+	struct fuse_read_in *fri = NULL;
+	int fnd_start;
 
-	if (uio_resid(uio) == 0) {
+	if (uio_resid(uio) == 0)
 		return 0;
-	}
 	fdisp_init(&fdi, 0);
 
 	/*
@@ -329,51 +514,70 @@
 	 * I/O).
 	 */
 
+	/*
+	 * fnd_start is set non-zero once the offset in the directory gets
+	 * to the startoff.  This is done because directories must be read
+	 * from the beginning (offset == 0) when fuse_vnop_readdir() needs
+	 * to do an open of the directory.
+	 * If it is not set non-zero here, it will be set non-zero in
+	 * fuse_internal_readdir_processdata() when uio_offset == startoff.
+	 */
+	fnd_start = 0;
+	if (uio->uio_offset == startoff)
+		fnd_start = 1;
 	while (uio_resid(uio) > 0) {
-
 		fdi.iosize = sizeof(*fri);
-		fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
+		if (fri == NULL)
+			fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
+		else
+			fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
 
 		fri = fdi.indata;
 		fri->fh = fufh->fh_id;
 		fri->offset = uio_offset(uio);
-		fri->size = min(uio_resid(uio), FUSE_DEFAULT_IOSIZE);
-		/* mp->max_read */
+		fri->size = MIN(uio->uio_resid,
+		    fuse_get_mpdata(vp->v_mount)->max_read);
 
-		    if ((err = fdisp_wait_answ(&fdi))) {
+		if ((err = fdisp_wait_answ(&fdi)))
 			break;
-		}
-		if ((err = fuse_internal_readdir_processdata(uio, fri->size, fdi.answ,
-		    fdi.iosize, cookediov))) {
+		if ((err = fuse_internal_readdir_processdata(uio, startoff,
+		    &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov,
+		    ncookies, &cookies)))
 			break;
-		}
 	}
 
 	fdisp_destroy(&fdi);
 	return ((err == -1) ? 0 : err);
 }
 
+/*
+ * Return -1 to indicate that this readdir is finished, 0 if it copied
+ * all the directory data read in and it may be possible to read more
+ * and greater than 0 for a failure.
+ */
 int
 fuse_internal_readdir_processdata(struct uio *uio,
+    off_t startoff,
+    int *fnd_start,
     size_t reqsize,
     void *buf,
     size_t bufsize,
-    void *param)
+    struct fuse_iov *cookediov,
+    int *ncookies,
+    u_long **cookiesp)
 {
 	int err = 0;
-	int cou = 0;
 	int bytesavail;
 	size_t freclen;
 
 	struct dirent *de;
 	struct fuse_dirent *fudge;
-	struct fuse_iov *cookediov = param;
+	u_long *cookies;
 
-	if (bufsize < FUSE_NAME_OFFSET) {
+	cookies = *cookiesp;
+	if (bufsize < FUSE_NAME_OFFSET)
 		return -1;
-	}
 	for (;;) {
-
 		if (bufsize < FUSE_NAME_OFFSET) {
 			err = -1;
 			break;
@@ -381,10 +585,12 @@
 		fudge = (struct fuse_dirent *)buf;
 		freclen = FUSE_DIRENT_SIZE(fudge);
 
-		cou++;
-
 		if (bufsize < freclen) {
-			err = ((cou == 1) ? -1 : 0);
+			/*
+			 * This indicates a partial directory entry at the
+			 * end of the directory data.
+			 */
+			err = -1;
 			break;
 		}
 #ifdef ZERO_PAD_INCOMPLETE_BUFS
@@ -402,30 +608,47 @@
 					    &fudge->namelen);
 
 		if (bytesavail > uio_resid(uio)) {
+			/* Out of space for the dir so we are done. */
 			err = -1;
 			break;
 		}
-		fiov_refresh(cookediov);
-		fiov_adjust(cookediov, bytesavail);
+		/*
+		 * Don't start to copy the directory entries out until
+		 * the requested offset in the directory is found.
+		 */
+		if (*fnd_start != 0) {
+			fiov_adjust(cookediov, bytesavail);
+			bzero(cookediov->base, bytesavail);
 
-		de = (struct dirent *)cookediov->base;
-		de->d_fileno = fudge->ino;
-		de->d_reclen = bytesavail;
-		de->d_type = fudge->type;
-		de->d_namlen = fudge->namelen;
-		memcpy((char *)cookediov->base + sizeof(struct dirent) - 
-		       MAXNAMLEN - 1,
-		       (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
-		dirent_terminate(de);
+			de = (struct dirent *)cookediov->base;
+			de->d_fileno = fudge->ino;
+			de->d_reclen = bytesavail;
+			de->d_type = fudge->type;
+			de->d_namlen = fudge->namelen;
+			memcpy((char *)cookediov->base + sizeof(struct dirent) -
+			       MAXNAMLEN - 1,
+			       (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
+			dirent_terminate(de);
 
-		err = uiomove(cookediov->base, cookediov->len, uio);
-		if (err) {
-			break;
-		}
+			err = uiomove(cookediov->base, cookediov->len, uio);
+			if (err)
+				break;
+			if (cookies != NULL) {
+				if (*ncookies == 0) {
+					err = -1;
+					break;
+				}
+				*cookies = fudge->off;
+				cookies++;
+				(*ncookies)--;
+			}
+		} else if (startoff == fudge->off)
+			*fnd_start = 1;
 		buf = (char *)buf + freclen;
 		bufsize -= freclen;
 		uio_setoffset(uio, fudge->off);
 	}
+	*cookiesp = cookies;
 
 	return err;
 }
@@ -439,12 +662,9 @@
     enum fuse_opcode op)
 {
 	struct fuse_dispatcher fdi;
-	struct fuse_vnode_data *fvdat;
-	int err;
+	nlink_t nlink;
+	int err = 0;
 
-	err = 0;
-	fvdat = VTOFUD(vp);
-
 	fdisp_init(&fdi, cnp->cn_namelen + 1);
 	fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred);
 
@@ -453,6 +673,35 @@
 
 	err = fdisp_wait_answ(&fdi);
 	fdisp_destroy(&fdi);
+
+	if (err)
+		return (err);
+
+	/* 
+	 * Access the cached nlink even if the attr cached has expired.  If
+	 * it's inaccurate, the worst that will happen is:
+	 * 1) We'll recycle the vnode even though the file has another link we
+	 *    don't know about, costing a bit of cpu time, or
+	 * 2) We won't recycle the vnode even though all of its links are gone.
+	 *    It will linger around until vnlru reclaims it, costing a bit of
+	 *    temporary memory.
+	 */
+	nlink = VTOFUD(vp)->cached_attrs.va_nlink--;
+
+	/* 
+	 * Purge the parent's attribute cache because the daemon
+	 * should've updated its mtime and ctime.
+	 */
+	fuse_vnode_clear_attr_cache(dvp);
+
+	/* NB: nlink could be zero if it was never cached */
+	if (nlink <= 1 || vnode_vtype(vp) == VDIR) {
+		fuse_internal_vnode_disappear(vp);
+	} else {
+		cache_purge(vp);
+		fuse_vnode_update(vp, FN_CTIMECHANGE);
+	}
+
 	return err;
 }
 
@@ -532,6 +781,13 @@
 		    feo->nodeid, 1);
 		return err;
 	}
+
+	/* 
+	 * Purge the parent's attribute cache because the daemon should've
+	 * updated its mtime and ctime
+	 */
+	fuse_vnode_clear_attr_cache(dvp);
+
 	fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
 		feo->attr_valid_nsec, NULL);
 
@@ -593,10 +849,79 @@
 	ffi = fdi.indata;
 	ffi->nlookup = nlookup;
 
-	fuse_insert_message(fdi.tick);
+	fuse_insert_message(fdi.tick, false);
 	fdisp_destroy(&fdi);
 }
 
+/* Fetch the vnode's attributes from the daemon*/
+int
+fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
+	struct ucred *cred, struct thread *td)
+{
+	struct fuse_dispatcher fdi;
+	struct fuse_vnode_data *fvdat = VTOFUD(vp);
+	struct fuse_getattr_in *fgai;
+	struct fuse_attr_out *fao;
+	off_t old_filesize = fvdat->cached_attrs.va_size;
+	struct timespec old_ctime = fvdat->cached_attrs.va_ctime;
+	struct timespec old_mtime = fvdat->cached_attrs.va_mtime;
+	enum vtype vtyp;
+	int err;
+
+	fdisp_init(&fdi, 0);
+	fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
+	fgai = fdi.indata;
+	/* 
+	 * We could look up a file handle and set it in fgai->fh, but that
+	 * involves extra runtime work and I'm unaware of any file systems that
+	 * care.
+	 */
+	fgai->getattr_flags = 0;
+	if ((err = fdisp_simple_putget_vp(&fdi, FUSE_GETATTR, vp, td, cred))) {
+		if (err == ENOENT)
+			fuse_internal_vnode_disappear(vp);
+		goto out;
+	}
+
+	fao = (struct fuse_attr_out *)fdi.answ;
+	vtyp = IFTOVT(fao->attr.mode);
+	if (fvdat->flag & FN_SIZECHANGE)
+		fao->attr.size = old_filesize;
+	if (fvdat->flag & FN_CTIMECHANGE) {
+		fao->attr.ctime = old_ctime.tv_sec;
+		fao->attr.ctimensec = old_ctime.tv_nsec;
+	}
+	if (fvdat->flag & FN_MTIMECHANGE) {
+		fao->attr.mtime = old_mtime.tv_sec;
+		fao->attr.mtimensec = old_mtime.tv_nsec;
+	}
+	fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
+		fao->attr_valid_nsec, vap);
+	if (vtyp != vnode_vtype(vp)) {
+		fuse_internal_vnode_disappear(vp);
+		err = ENOENT;
+	}
+
+out:
+	fdisp_destroy(&fdi);
+	return err;
+}
+
+/* Read a vnode's attributes from cache or fetch them from the fuse daemon */
+int
+fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred,
+	struct thread *td)
+{
+	struct vattr *attrs;
+
+	if ((attrs = VTOVA(vp)) != NULL) {
+		*vap = *attrs;	/* struct copy */
+		return 0;
+	}
+
+	return fuse_internal_do_getattr(vp, vap, cred, td);
+}
+
 void
 fuse_internal_vnode_disappear(struct vnode *vp)
 {
@@ -604,7 +929,6 @@
 
 	ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear");
 	fvdat->flag |= FN_REVOKED;
-	fvdat->valid_attr_cache = false;
 	cache_purge(vp);
 }
 
@@ -625,27 +949,69 @@
 	}
 	fiio = fticket_resp(tick)->base;
 
-	/* XXX: Do we want to check anything further besides this? */
-	if (fiio->major < 7) {
-		SDT_PROBE2(fuse, , internal, trace, 1,
+	data->fuse_libabi_major = fiio->major;
+	data->fuse_libabi_minor = fiio->minor;
+	if (!fuse_libabi_geq(data, 7, 4)) {
+		/* 
+		 * With a little work we could support servers as old as 7.1.
+		 * But there would be little payoff.
+		 */
+		SDT_PROBE2(fusefs, , internal, trace, 1,
 			"userpace version too low");
 		err = EPROTONOSUPPORT;
 		goto out;
 	}
-	data->fuse_libabi_major = fiio->major;
-	data->fuse_libabi_minor = fiio->minor;
 
 	if (fuse_libabi_geq(data, 7, 5)) {
-		if (fticket_resp(tick)->len == sizeof(struct fuse_init_out)) {
+		if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) ||
+		    fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) {
 			data->max_write = fiio->max_write;
+			if (fiio->flags & FUSE_ASYNC_READ)
+				data->dataflags |= FSESS_ASYNC_READ;
+			if (fiio->flags & FUSE_POSIX_LOCKS)
+				data->dataflags |= FSESS_POSIX_LOCKS;
+			if (fiio->flags & FUSE_EXPORT_SUPPORT)
+				data->dataflags |= FSESS_EXPORT_SUPPORT;
+			/* 
+			 * Don't bother to check FUSE_BIG_WRITES, because it's
+			 * redundant with max_write
+			 */
+			/* 
+			 * max_background and congestion_threshold are not
+			 * implemented
+			 */
 		} else {
 			err = EINVAL;
 		}
 	} else {
-		/* Old fix values */
+		/* Old fixed values */
 		data->max_write = 4096;
 	}
 
+	if (fuse_libabi_geq(data, 7, 6))
+		data->max_readahead_blocks = fiio->max_readahead / maxbcachebuf;
+
+	if (!fuse_libabi_geq(data, 7, 7))
+		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
+
+	if (!fuse_libabi_geq(data, 7, 8)) {
+		fsess_set_notimpl(data->mp, FUSE_BMAP);
+		fsess_set_notimpl(data->mp, FUSE_DESTROY);
+	}
+
+	if (fuse_libabi_geq(data, 7, 23) && fiio->time_gran >= 1 &&
+	    fiio->time_gran <= 1000000000)
+		data->time_gran = fiio->time_gran;
+	else
+		data->time_gran = 1;
+
+	if (!fuse_libabi_geq(data, 7, 23))
+		data->cache_mode = fuse_data_cache_mode;
+	else if (fiio->flags & FUSE_WRITEBACK_CACHE)
+		data->cache_mode = FUSE_CACHE_WB;
+	else
+		data->cache_mode = FUSE_CACHE_WT;
+
 out:
 	if (err) {
 		fdata_set_dead(data);
@@ -669,14 +1035,156 @@
 	fiii = fdi.indata;
 	fiii->major = FUSE_KERNEL_VERSION;
 	fiii->minor = FUSE_KERNEL_MINOR_VERSION;
-	fiii->max_readahead = FUSE_DEFAULT_IOSIZE * 16;
-	fiii->flags = 0;
+	/* 
+	 * fusefs currently reads ahead no more than one cache block at a time.
+	 * See fuse_read_biobackend
+	 */
+	fiii->max_readahead = maxbcachebuf;
+	/*
+	 * Unsupported features:
+	 * FUSE_FILE_OPS: No known FUSE server or client supports it
+	 * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it
+	 * FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even
+	 *	when default ACLs are in use.
+	 * FUSE_SPLICE_WRITE, FUSE_SPLICE_MOVE, FUSE_SPLICE_READ: FreeBSD
+	 *	doesn't have splice(2).
+	 * FUSE_FLOCK_LOCKS: not yet implemented
+	 * FUSE_HAS_IOCTL_DIR: not yet implemented
+	 * FUSE_AUTO_INVAL_DATA: not yet implemented
+	 * FUSE_DO_READDIRPLUS: not yet implemented
+	 * FUSE_READDIRPLUS_AUTO: not yet implemented
+	 * FUSE_ASYNC_DIO: not yet implemented
+	 * FUSE_NO_OPEN_SUPPORT: not yet implemented
+	 */
+	fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT
+		| FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE;
 
 	fuse_insert_callback(fdi.tick, fuse_internal_init_callback);
-	fuse_insert_message(fdi.tick);
+	fuse_insert_message(fdi.tick, false);
 	fdisp_destroy(&fdi);
 }
 
+/* 
+ * Send a FUSE_SETATTR operation with no permissions checks.  If cred is NULL,
+ * send the request with root credentials
+ */
+int fuse_internal_setattr(struct vnode *vp, struct vattr *vap,
+	struct thread *td, struct ucred *cred)
+{
+	struct fuse_vnode_data *fvdat;
+	struct fuse_dispatcher fdi;
+	struct fuse_setattr_in *fsai;
+	struct mount *mp;
+	pid_t pid = td->td_proc->p_pid;
+	struct fuse_data *data;
+	int dataflags;
+	int err = 0;
+	enum vtype vtyp;
+	int sizechanged = -1;
+	uint64_t newsize = 0;
+
+	mp = vnode_mount(vp);
+	fvdat = VTOFUD(vp);
+	data = fuse_get_mpdata(mp);
+	dataflags = data->dataflags;
+
+	fdisp_init(&fdi, sizeof(*fsai));
+	fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred);
+	if (!cred) {
+		fdi.finh->uid = 0;
+		fdi.finh->gid = 0;
+	}
+	fsai = fdi.indata;
+	fsai->valid = 0;
+
+	if (vap->va_uid != (uid_t)VNOVAL) {
+		fsai->uid = vap->va_uid;
+		fsai->valid |= FATTR_UID;
+	}
+	if (vap->va_gid != (gid_t)VNOVAL) {
+		fsai->gid = vap->va_gid;
+		fsai->valid |= FATTR_GID;
+	}
+	if (vap->va_size != VNOVAL) {
+		struct fuse_filehandle *fufh = NULL;
+
+		/*Truncate to a new value. */
+		fsai->size = vap->va_size;
+		sizechanged = 1;
+		newsize = vap->va_size;
+		fsai->valid |= FATTR_SIZE;
+
+		fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
+		if (fufh) {
+			fsai->fh = fufh->fh_id;
+			fsai->valid |= FATTR_FH;
+		}
+		VTOFUD(vp)->flag &= ~FN_SIZECHANGE;
+	}
+	if (vap->va_atime.tv_sec != VNOVAL) {
+		fsai->atime = vap->va_atime.tv_sec;
+		fsai->atimensec = vap->va_atime.tv_nsec;
+		fsai->valid |= FATTR_ATIME;
+		if (vap->va_vaflags & VA_UTIMES_NULL)
+			fsai->valid |= FATTR_ATIME_NOW;
+	}
+	if (vap->va_mtime.tv_sec != VNOVAL) {
+		fsai->mtime = vap->va_mtime.tv_sec;
+		fsai->mtimensec = vap->va_mtime.tv_nsec;
+		fsai->valid |= FATTR_MTIME;
+		if (vap->va_vaflags & VA_UTIMES_NULL)
+			fsai->valid |= FATTR_MTIME_NOW;
+	} else if (fvdat->flag & FN_MTIMECHANGE) {
+		fsai->mtime = fvdat->cached_attrs.va_mtime.tv_sec;
+		fsai->mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec;
+		fsai->valid |= FATTR_MTIME;
+	}
+	if (fuse_libabi_geq(data, 7, 23) && fvdat->flag & FN_CTIMECHANGE) {
+		fsai->ctime = fvdat->cached_attrs.va_ctime.tv_sec;
+		fsai->ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec;
+		fsai->valid |= FATTR_CTIME;
+	}
+	if (vap->va_mode != (mode_t)VNOVAL) {
+		fsai->mode = vap->va_mode & ALLPERMS;
+		fsai->valid |= FATTR_MODE;
+	}
+	if (!fsai->valid) {
+		goto out;
+	}
+
+	if ((err = fdisp_wait_answ(&fdi)))
+		goto out;
+	vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode);
+
+	if (vnode_vtype(vp) != vtyp) {
+		if (vnode_vtype(vp) == VNON && vtyp != VNON) {
+			SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! "
+				"vnode_vtype is VNON and vtype isn't.");
+		} else {
+			/*
+	                 * STALE vnode, ditch
+	                 *
+			 * The vnode has changed its type "behind our back".
+			 * There's nothing really we can do, so let us just
+			 * force an internal revocation and tell the caller to
+			 * try again, if interested.
+	                 */
+			fuse_internal_vnode_disappear(vp);
+			err = EAGAIN;
+		}
+	}
+	if (err == 0) {
+		struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ;
+		fuse_vnode_undirty_cached_timestamps(vp);
+		fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
+			fao->attr_valid_nsec, NULL);
+	}
+
+out:
+	fdisp_destroy(&fdi);
+	return err;
+}
+
 #ifdef ZERO_PAD_INCOMPLETE_BUFS
 static int
 isbzero(void *buf, size_t len)
@@ -692,3 +1200,19 @@
 }
 
 #endif
+
+void
+fuse_internal_init(void)
+{
+	fuse_lookup_cache_misses = counter_u64_alloc(M_WAITOK);
+	counter_u64_zero(fuse_lookup_cache_misses);
+	fuse_lookup_cache_hits = counter_u64_alloc(M_WAITOK);
+	counter_u64_zero(fuse_lookup_cache_hits);
+}
+
+void
+fuse_internal_destroy(void)
+{
+	counter_u64_free(fuse_lookup_cache_hits);
+	counter_u64_free(fuse_lookup_cache_misses);
+}
Index: sys/fs/fuse/fuse_io.h
===================================================================
--- sys/fs/fuse/fuse_io.h
+++ sys/fs/fuse/fuse_io.h
@@ -32,6 +32,11 @@
  * 
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
+ *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -61,7 +66,7 @@
 #define _FUSE_IO_H_
 
 int fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag,
-    struct ucred *cred);
+    struct ucred *cred, pid_t pid);
 int fuse_io_strategy(struct vnode *vp, struct buf *bp);
 int fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td);
 int fuse_io_invalbuf(struct vnode *vp, struct thread *td);
Index: sys/fs/fuse/fuse_io.c
===================================================================
--- sys/fs/fuse/fuse_io.c
+++ sys/fs/fuse/fuse_io.c
@@ -33,6 +33,11 @@
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -72,6 +77,7 @@
 #include <sys/sx.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
@@ -83,6 +89,7 @@
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysctl.h>
+#include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
@@ -98,45 +105,108 @@
 #include "fuse_ipc.h"
 #include "fuse_io.h"
 
-SDT_PROVIDER_DECLARE(fuse);
 /* 
+ * Set in a struct buf to indicate that the write came from the buffer cache
+ * and the originating cred and pid are no longer known.
+ */
+#define B_FUSEFS_WRITE_CACHE B_FS_FLAG1
+
+SDT_PROVIDER_DECLARE(fusefs);
+/* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
-SDT_PROBE_DEFINE2(fuse, , io, trace, "int", "char*");
+SDT_PROBE_DEFINE2(fusefs, , io, trace, "int", "char*");
 
+static void
+fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
+	struct thread *td);
 static int 
 fuse_read_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh);
 static int 
-fuse_read_biobackend(struct vnode *vp, struct uio *uio,
-    struct ucred *cred, struct fuse_filehandle *fufh);
+fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
+    struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid);
 static int 
 fuse_write_directbackend(struct vnode *vp, struct uio *uio,
-    struct ucred *cred, struct fuse_filehandle *fufh, int ioflag);
+    struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize,
+    int ioflag, bool pages);
 static int 
 fuse_write_biobackend(struct vnode *vp, struct uio *uio,
-    struct ucred *cred, struct fuse_filehandle *fufh, int ioflag);
+    struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid);
 
-SDT_PROBE_DEFINE5(fuse, , io, io_dispatch, "struct vnode*", "struct uio*",
+/*
+ * FreeBSD clears the SUID and SGID bits on any write by a non-root user.
+ */
+static void
+fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
+	struct thread *td)
+{
+	struct fuse_data *data;
+	struct mount *mp;
+	struct vattr va;
+	int dataflags;
+
+	mp = vnode_mount(vp);
+	data = fuse_get_mpdata(mp);
+	dataflags = data->dataflags;
+
+	if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
+		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
+			fuse_internal_getattr(vp, &va, cred, td);
+			if (va.va_mode & (S_ISUID | S_ISGID)) {
+				mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID);
+				/* Clear all vattr fields except mode */
+				vattr_null(&va);
+				va.va_mode = mode;
+
+				/*
+				 * Ignore fuse_internal_setattr's return value,
+				 * because at this point the write operation has
+				 * already succeeded and we don't want to return
+				 * failing status for that.
+				 */
+				(void)fuse_internal_setattr(vp, &va, td, NULL);
+			}
+		}
+	}
+}
+
+SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch, "struct vnode*", "struct uio*",
 		"int", "struct ucred*", "struct fuse_filehandle*");
+SDT_PROBE_DEFINE4(fusefs, , io, io_dispatch_filehandles_closed, "struct vnode*",
+    "struct uio*", "int", "struct ucred*");
 int
 fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag,
-    struct ucred *cred)
+    struct ucred *cred, pid_t pid)
 {
 	struct fuse_filehandle *fufh;
 	int err, directio;
+	int fflag;
+	bool closefufh = false;
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 
-	err = fuse_filehandle_getrw(vp,
-	    (uio->uio_rw == UIO_READ) ? FUFH_RDONLY : FUFH_WRONLY, &fufh);
-	if (err) {
+	fflag = (uio->uio_rw == UIO_READ) ? FREAD : FWRITE;
+	err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
+	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
+		/* 
+		 * nfsd will do I/O without first doing VOP_OPEN.  We
+		 * must implicitly open the file here
+		 */
+		err = fuse_filehandle_open(vp, fflag, &fufh, curthread, cred);
+		closefufh = true;
+	}
+	else if (err) {
+		SDT_PROBE4(fusefs, , io, io_dispatch_filehandles_closed,
+			vp, uio, ioflag, cred);
 		printf("FUSE: io dispatch: filehandles are closed\n");
 		return err;
 	}
-	SDT_PROBE5(fuse, , io, io_dispatch, vp, uio, ioflag, cred, fufh);
+	if (err)
+		goto out;
+	SDT_PROBE5(fusefs, , io, io_dispatch, vp, uio, ioflag, cred, fufh);
 
 	/*
          * Ideally, when the daemon asks for direct io at open time, the
@@ -153,108 +223,136 @@
 	switch (uio->uio_rw) {
 	case UIO_READ:
 		if (directio) {
-			SDT_PROBE2(fuse, , io, trace, 1,
+			SDT_PROBE2(fusefs, , io, trace, 1,
 				"direct read of vnode");
 			err = fuse_read_directbackend(vp, uio, cred, fufh);
 		} else {
-			SDT_PROBE2(fuse, , io, trace, 1,
+			SDT_PROBE2(fusefs, , io, trace, 1,
 				"buffered read of vnode");
-			err = fuse_read_biobackend(vp, uio, cred, fufh);
+			err = fuse_read_biobackend(vp, uio, ioflag, cred, fufh,
+				pid);
 		}
 		break;
 	case UIO_WRITE:
-		/*
-		 * Kludge: simulate write-through caching via write-around
-		 * caching.  Same effect, as far as never caching dirty data,
-		 * but slightly pessimal in that newly written data is not
-		 * cached.
-		 */
-		if (directio || fuse_data_cache_mode == FUSE_CACHE_WT) {
-			SDT_PROBE2(fuse, , io, trace, 1,
+		fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE);
+		if (directio) {
+			const int iosize = fuse_iosize(vp);
+			off_t start, end, filesize;
+
+			SDT_PROBE2(fusefs, , io, trace, 1,
 				"direct write of vnode");
-			err = fuse_write_directbackend(vp, uio, cred, fufh, ioflag);
+
+			err = fuse_vnode_size(vp, &filesize, cred, curthread);
+			if (err)
+				goto out;
+
+			start = uio->uio_offset;
+			end = start + uio->uio_resid;
+			KASSERT((ioflag & (IO_VMIO | IO_DIRECT)) !=
+				(IO_VMIO | IO_DIRECT),
+			    ("IO_DIRECT used for a cache flush?"));
+			/* Invalidate the write cache when writing directly */
+			v_inval_buf_range(vp, start, end, iosize);
+			err = fuse_write_directbackend(vp, uio, cred, fufh,
+				filesize, ioflag, false);
 		} else {
-			SDT_PROBE2(fuse, , io, trace, 1,
+			SDT_PROBE2(fusefs, , io, trace, 1,
 				"buffered write of vnode");
-			err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag);
+			if (!fsess_opt_writeback(vnode_mount(vp)))
+				ioflag |= IO_SYNC;
+			err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag,
+				pid);
 		}
+		fuse_io_clear_suid_on_write(vp, cred, uio->uio_td);
 		break;
 	default:
 		panic("uninterpreted mode passed to fuse_io_dispatch");
 	}
 
+out:
+	if (closefufh)
+		fuse_filehandle_close(vp, fufh, curthread, cred);
+
 	return (err);
 }
 
-SDT_PROBE_DEFINE3(fuse, , io, read_bio_backend_start, "int", "int", "int");
-SDT_PROBE_DEFINE2(fuse, , io, read_bio_backend_feed, "int", "int");
-SDT_PROBE_DEFINE3(fuse, , io, read_bio_backend_end, "int", "ssize_t", "int");
+SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_start, "int", "int", "int", "int");
+SDT_PROBE_DEFINE2(fusefs, , io, read_bio_backend_feed, "int", "struct buf*");
+SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_end, "int", "ssize_t", "int",
+		"struct buf*");
 static int
-fuse_read_biobackend(struct vnode *vp, struct uio *uio,
-    struct ucred *cred, struct fuse_filehandle *fufh)
+fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
+    struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid)
 {
 	struct buf *bp;
-	daddr_t lbn;
-	int bcount;
-	int err = 0, n = 0, on = 0;
+	struct mount *mp;
+	struct fuse_data *data;
+	daddr_t lbn, nextlbn;
+	int bcount, nextsize;
+	int err, n = 0, on = 0, seqcount;
 	off_t filesize;
 
 	const int biosize = fuse_iosize(vp);
+	mp = vnode_mount(vp);
+	data = fuse_get_mpdata(mp);
 
-	if (uio->uio_resid == 0)
-		return (0);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
-	bcount = biosize;
-	filesize = VTOFUD(vp)->filesize;
+	seqcount = ioflag >> IO_SEQSHIFT;
 
-	do {
+	err = fuse_vnode_size(vp, &filesize, cred, curthread);
+	if (err)
+		return err;
+
+	for (err = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if (fuse_isdeadfs(vp)) {
 			err = ENXIO;
 			break;
 		}
+		if (filesize - uio->uio_offset <= 0)
+			break;
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 
-		SDT_PROBE3(fuse, , io, read_bio_backend_start,
-			biosize, (int)lbn, on);
-
-		/*
-	         * Obtain the buffer cache block.  Figure out the buffer size
-	         * when we are at EOF.  If we are modifying the size of the
-	         * buffer based on an EOF condition we need to hold
-	         * nfs_rslock() through obtaining the buffer to prevent
-	         * a potential writer-appender from messing with n_size.
-	         * Otherwise we may accidentally truncate the buffer and
-	         * lose dirty data.
-	         *
-	         * Note that bcount is *not* DEV_BSIZE aligned.
-	         */
 		if ((off_t)lbn * biosize >= filesize) {
 			bcount = 0;
 		} else if ((off_t)(lbn + 1) * biosize > filesize) {
 			bcount = filesize - (off_t)lbn *biosize;
+		} else {
+			bcount = biosize;
 		}
-		bp = getblk(vp, lbn, bcount, PCATCH, 0, 0);
+		nextlbn = lbn + 1;
+		nextsize = MIN(biosize, filesize - nextlbn * biosize);
 
-		if (!bp)
-			return (EINTR);
+		SDT_PROBE4(fusefs, , io, read_bio_backend_start,
+			biosize, (int)lbn, on, bcount);
 
-		/*
-	         * If B_CACHE is not set, we must issue the read.  If this
-	         * fails, we return an error.
-	         */
+		if (bcount < biosize) {
+			/* If near EOF, don't do readahead */
+			err = bread(vp, lbn, bcount, NOCRED, &bp);
+		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+			/* Try clustered read */
+			long totread = uio->uio_resid + on;
+			seqcount = MIN(seqcount,
+				data->max_readahead_blocks + 1);
+			err = cluster_read(vp, filesize, lbn, bcount, NOCRED,
+				totread, seqcount, 0, &bp);
+		} else if (seqcount > 1 && data->max_readahead_blocks >= 1) {
+			/* Try non-clustered readahead */
+			err = breadn(vp, lbn, bcount, &nextlbn, &nextsize, 1,
+				NOCRED, &bp);
+		} else {
+			/* Just read what was requested */
+			err = bread(vp, lbn, bcount, NOCRED, &bp);
+		}
 
-		if ((bp->b_flags & B_CACHE) == 0) {
-			bp->b_iocmd = BIO_READ;
-			vfs_busy_pages(bp, 0);
-			err = fuse_io_strategy(vp, bp);
-			if (err) {
-				brelse(bp);
-				return (err);
-			}
+		if (err) {
+			brelse(bp);
+			bp = NULL;
+			break;
 		}
+
 		/*
 	         * on is the offset into the current bp.  Figure out how many
 	         * bytes we can copy out of the bp.  Note that bcount is
@@ -264,33 +362,41 @@
 	         */
 
 		n = 0;
-		if (on < bcount)
-			n = MIN((unsigned)(bcount - on), uio->uio_resid);
+		if (on < bcount - bp->b_resid)
+			n = MIN((unsigned)(bcount - bp->b_resid - on),
+			    uio->uio_resid);
 		if (n > 0) {
-			SDT_PROBE2(fuse, , io, read_bio_backend_feed,
-				n, n + (int)bp->b_resid);
+			SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp);
 			err = uiomove(bp->b_data + on, n, uio);
 		}
-		brelse(bp);
-		SDT_PROBE3(fuse, , io, read_bio_backend_end, err,
-			uio->uio_resid, n);
-	} while (err == 0 && uio->uio_resid > 0 && n > 0);
+		vfs_bio_brelse(bp, ioflag);
+		SDT_PROBE4(fusefs, , io, read_bio_backend_end, err,
+			uio->uio_resid, n, bp);
+		if (bp->b_resid > 0) {
+			/* Short read indicates EOF */
+			break;
+		}
+	}
 
 	return (err);
 }
 
-SDT_PROBE_DEFINE1(fuse, , io, read_directbackend_start, "struct fuse_read_in*");
-SDT_PROBE_DEFINE2(fuse, , io, read_directbackend_complete,
-	"struct fuse_dispatcher*", "struct uio*");
+SDT_PROBE_DEFINE1(fusefs, , io, read_directbackend_start,
+	"struct fuse_read_in*");
+SDT_PROBE_DEFINE3(fusefs, , io, read_directbackend_complete,
+	"struct fuse_dispatcher*", "struct fuse_read_in*", "struct uio*");
 
 static int
 fuse_read_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh)
 {
+	struct fuse_data *data;
 	struct fuse_dispatcher fdi;
 	struct fuse_read_in *fri;
 	int err = 0;
 
+	data = fuse_get_mpdata(vp->v_mount);
+
 	if (uio->uio_resid == 0)
 		return (0);
 
@@ -312,19 +418,29 @@
 		fri->offset = uio->uio_offset;
 		fri->size = MIN(uio->uio_resid,
 		    fuse_get_mpdata(vp->v_mount)->max_read);
+		if (fuse_libabi_geq(data, 7, 9)) {
+			/* See comment regarding FUSE_WRITE_LOCKOWNER */
+			fri->read_flags = 0;
+			fri->flags = fufh_type_2_fflags(fufh->fufh_type);
+		}
 
-		SDT_PROBE1(fuse, , io, read_directbackend_start, fri);
+		SDT_PROBE1(fusefs, , io, read_directbackend_start, fri);
 
 		if ((err = fdisp_wait_answ(&fdi)))
 			goto out;
 
-		SDT_PROBE2(fuse, , io, read_directbackend_complete,
-			fdi.iosize, uio);
+		SDT_PROBE3(fusefs, , io, read_directbackend_complete,
+			&fdi, fri, uio);
 
 		if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio)))
 			break;
-		if (fdi.iosize < fri->size)
+		if (fdi.iosize < fri->size) {
+			/* 
+			 * Short read.  Should only happen at EOF or with
+			 * direct io.
+			 */
 			break;
+		}
 	}
 
 out:
@@ -334,25 +450,57 @@
 
 static int
 fuse_write_directbackend(struct vnode *vp, struct uio *uio,
-    struct ucred *cred, struct fuse_filehandle *fufh, int ioflag)
+    struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize,
+    int ioflag, bool pages)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
+	struct fuse_data *data;
 	struct fuse_write_in *fwi;
+	struct fuse_write_out *fwo;
 	struct fuse_dispatcher fdi;
 	size_t chunksize;
+	void *fwi_data;
+	off_t as_written_offset;
 	int diff;
 	int err = 0;
+	bool direct_io = fufh->fuse_open_flags & FOPEN_DIRECT_IO;
+	bool wrote_anything = false;
+	uint32_t write_flags;
 
+	data = fuse_get_mpdata(vp->v_mount);
+
+	/* 
+	 * Don't set FUSE_WRITE_LOCKOWNER in write_flags.  It can't be set
+	 * accurately when using POSIX AIO, libfuse doesn't use it, and I'm not
+	 * aware of any file systems that do.  It was an attempt to add
+	 * Linux-style mandatory locking to the FUSE protocol, but mandatory
+	 * locking is deprecated even on Linux.  See Linux commit
+	 * f33321141b273d60cbb3a8f56a5489baad82ba5e .
+	 */
+	/*
+	 * Set FUSE_WRITE_CACHE whenever we don't know the uid, gid, and/or pid
+	 * that originated a write.  For example when writing from the
+	 * writeback cache.  I don't know of a single file system that cares,
+	 * but the protocol says we're supposed to do this.
+	 */
+	write_flags = !pages && (
+		(ioflag & IO_DIRECT) ||
+		!fsess_opt_datacache(vnode_mount(vp)) ||
+		!fsess_opt_writeback(vnode_mount(vp))) ? 0 : FUSE_WRITE_CACHE;
+
 	if (uio->uio_resid == 0)
 		return (0);
+
 	if (ioflag & IO_APPEND)
-		uio_setoffset(uio, fvdat->filesize);
+		uio_setoffset(uio, filesize);
 
+	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
+		return (EFBIG);
+
 	fdisp_init(&fdi, 0);
 
 	while (uio->uio_resid > 0) {
-		chunksize = MIN(uio->uio_resid,
-		    fuse_get_mpdata(vp->v_mount)->max_write);
+		chunksize = MIN(uio->uio_resid, data->max_write);
 
 		fdi.iosize = sizeof(*fwi) + chunksize;
 		fdisp_make_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred);
@@ -361,79 +509,140 @@
 		fwi->fh = fufh->fh_id;
 		fwi->offset = uio->uio_offset;
 		fwi->size = chunksize;
+		fwi->write_flags = write_flags;
+		if (fuse_libabi_geq(data, 7, 9)) {
+			fwi->flags = fufh_type_2_fflags(fufh->fufh_type);
+			fwi_data = (char *)fdi.indata + sizeof(*fwi);
+		} else {
+			fwi_data = (char *)fdi.indata +
+				FUSE_COMPAT_WRITE_IN_SIZE;
+		}
 
-		if ((err = uiomove((char *)fdi.indata + sizeof(*fwi),
-		    chunksize, uio)))
+		if ((err = uiomove(fwi_data, chunksize, uio)))
 			break;
 
-		if ((err = fdisp_wait_answ(&fdi)))
+retry:
+		err = fdisp_wait_answ(&fdi);
+		if (err == ERESTART || err == EINTR || err == EWOULDBLOCK) {
+			/*
+			 * Rewind the uio so dofilewrite will know it's
+			 * incomplete
+			 */
+			uio->uio_resid += fwi->size;
+			uio->uio_offset -= fwi->size;
+			/* 
+			 * Change ERESTART into EINTR because we can't rewind
+			 * uio->uio_iov.  Basically, once uiomove(9) has been
+			 * called, it's impossible to restart a syscall.
+			 */
+			if (err == ERESTART)
+				err = EINTR;
 			break;
+		} else if (err) {
+			break;
+		} else {
+			wrote_anything = true;
+		}
 
+		fwo = ((struct fuse_write_out *)fdi.answ);
+
 		/* Adjust the uio in the case of short writes */
-		diff = chunksize - ((struct fuse_write_out *)fdi.answ)->size;
+		diff = fwi->size - fwo->size;
+		as_written_offset = uio->uio_offset - diff;
+
+		if (as_written_offset - diff > filesize)
+			fuse_vnode_setsize(vp, as_written_offset);
+		if (as_written_offset - diff >= filesize)
+			fvdat->flag &= ~FN_SIZECHANGE;
+
 		if (diff < 0) {
+			printf("WARNING: misbehaving FUSE filesystem "
+				"wrote more data than we provided it\n");
 			err = EINVAL;
 			break;
-		} else if (diff > 0 && !(ioflag & IO_DIRECT)) {
-			/* 
-			 * XXX We really should be directly checking whether
-			 * the file was opened with FOPEN_DIRECT_IO, not
-			 * IO_DIRECT.  IO_DIRECT can be set in multiple ways.
-			 */
-			SDT_PROBE2(fuse, , io, trace, 1,
-				"misbehaving filesystem: short writes are only "
-				"allowed with direct_io");
+		} else if (diff > 0) {
+			/* Short write */
+			if (!direct_io) {
+				printf("WARNING: misbehaving FUSE filesystem: "
+					"short writes are only allowed with "
+					"direct_io\n");
+			}
+			if (ioflag & IO_DIRECT) {
+				/* Return early */
+				uio->uio_resid += diff;
+				uio->uio_offset -= diff;
+				break;
+			} else {
+				/* Resend the unwritten portion of data */
+				fdi.iosize = sizeof(*fwi) + diff;
+				/* Refresh fdi without clearing data buffer */
+				fdisp_refresh_vp(&fdi, FUSE_WRITE, vp,
+					uio->uio_td, cred);
+				fwi = fdi.indata;
+				MPASS2(fwi == fdi.indata, "FUSE dispatcher "
+					"reallocated despite no increase in "
+					"size?");
+				void *src = (char*)fwi_data + fwo->size;
+				memmove(fwi_data, src, diff);
+				fwi->fh = fufh->fh_id;
+				fwi->offset = as_written_offset;
+				fwi->size = diff;
+				fwi->write_flags = write_flags;
+				goto retry;
+			}
 		}
-		uio->uio_resid += diff;
-		uio->uio_offset -= diff;
-
-		if (uio->uio_offset > fvdat->filesize &&
-		    fuse_data_cache_mode != FUSE_CACHE_UC) {
-			fuse_vnode_setsize(vp, uio->uio_offset);
-			fvdat->flag &= ~FN_SIZECHANGE;
-		}
 	}
 
 	fdisp_destroy(&fdi);
 
+	if (wrote_anything)
+		fuse_vnode_undirty_cached_timestamps(vp);
+
 	return (err);
 }
 
-SDT_PROBE_DEFINE6(fuse, , io, write_biobackend_start, "int64_t", "int", "int",
+SDT_PROBE_DEFINE6(fusefs, , io, write_biobackend_start, "int64_t", "int", "int",
 		"struct uio*", "int", "bool");
-SDT_PROBE_DEFINE2(fuse, , io, write_biobackend_append_race, "long", "int");
+SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_append_race, "long", "int");
+SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_issue, "int", "struct buf*");
 
 static int
 fuse_write_biobackend(struct vnode *vp, struct uio *uio,
-    struct ucred *cred, struct fuse_filehandle *fufh, int ioflag)
+    struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct buf *bp;
 	daddr_t lbn;
+	off_t filesize;
 	int bcount;
-	int n, on, err = 0;
+	int n, on, seqcount, err = 0;
+	bool last_page;
 
 	const int biosize = fuse_iosize(vp);
 
-	KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode"));
+	seqcount = ioflag >> IO_SEQSHIFT;
+
+	KASSERT(uio->uio_rw == UIO_WRITE, ("fuse_write_biobackend mode"));
 	if (vp->v_type != VREG)
 		return (EIO);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	if (uio->uio_resid == 0)
 		return (0);
+
+	err = fuse_vnode_size(vp, &filesize, cred, curthread);
+	if (err)
+		return err;
+
 	if (ioflag & IO_APPEND)
-		uio_setoffset(uio, fvdat->filesize);
+		uio_setoffset(uio, filesize);
 
-	/*
-         * Find all of this file's B_NEEDCOMMIT buffers.  If our writes
-         * would exceed the local maximum per-file write commit size when
-         * combined with those, we must decide whether to flush,
-         * go synchronous, or return err.  We don't bother checking
-         * IO_UNIT -- we just make all writes atomic anyway, as there's
-         * no point optimizing for something that really won't ever happen.
-         */
+	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
+		return (EFBIG);
+
 	do {
+		bool direct_append, extending;
+
 		if (fuse_isdeadfs(vp)) {
 			err = ENXIO;
 			break;
@@ -443,66 +652,60 @@
 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
 
 again:
-		/*
-	         * Handle direct append and file extension cases, calculate
-	         * unaligned buffer size.
-	         */
-		if (uio->uio_offset == fvdat->filesize && n) {
-			/*
-	                 * Get the buffer (in its pre-append state to maintain
-	                 * B_CACHE if it was previously set).  Resize the
-	                 * nfsnode after we have locked the buffer to prevent
-	                 * readers from reading garbage.
-	                 */
-			bcount = on;
-			SDT_PROBE6(fuse, , io, write_biobackend_start,
-				lbn, on, n, uio, bcount, true);
-			bp = getblk(vp, lbn, bcount, PCATCH, 0, 0);
-
+		/* Get or create a buffer for the write */
+		direct_append = uio->uio_offset == filesize && n;
+		if (uio->uio_offset + n < filesize) {
+			extending = false;
+			if ((off_t)(lbn + 1) * biosize < filesize) {
+				/* Not the file's last block */
+				bcount = biosize;
+			} else {
+				/* The file's last block */
+				bcount = filesize - (off_t)lbn * biosize;
+			}
+		} else {
+			extending = true;
+			bcount = on + n;
+		}
+		if (howmany(((off_t)lbn * biosize + on + n - 1), PAGE_SIZE) >=
+		    howmany(filesize, PAGE_SIZE))
+			last_page = true;
+		else
+			last_page = false;
+		if (direct_append) {
+			/* 
+			 * Take care to preserve the buffer's B_CACHE state so
+			 * as not to cause an unnecessary read.
+			 */
+			bp = getblk(vp, lbn, on, PCATCH, 0, 0);
 			if (bp != NULL) {
-				long save;
-
-				err = fuse_vnode_setsize(vp,
-							 uio->uio_offset + n);
-				if (err) {
-					brelse(bp);
-					break;
-				}
-				save = bp->b_flags & B_CACHE;
-				bcount += n;
+				uint32_t save = bp->b_flags & B_CACHE;
 				allocbuf(bp, bcount);
 				bp->b_flags |= save;
 			}
 		} else {
-			/*
-	                 * Obtain the locked cache block first, and then
-	                 * adjust the file's size as appropriate.
-	                 */
-			bcount = on + n;
-			if ((off_t)lbn * biosize + bcount < fvdat->filesize) {
-				if ((off_t)(lbn + 1) * biosize < fvdat->filesize)
-					bcount = biosize;
-				else
-					bcount = fvdat->filesize - 
-					  (off_t)lbn *biosize;
-			}
-			SDT_PROBE6(fuse, , io, write_biobackend_start,
-				lbn, on, n, uio, bcount, false);
 			bp = getblk(vp, lbn, bcount, PCATCH, 0, 0);
-			if (bp && uio->uio_offset + n > fvdat->filesize) {
-				err = fuse_vnode_setsize(vp,
-							 uio->uio_offset + n);
-				if (err) {
-					brelse(bp);
-					break;
-				}
-			}
 		}
-
 		if (!bp) {
 			err = EINTR;
 			break;
 		}
+		if (extending) {
+			/* 
+			 * Extend file _after_ locking buffer so we won't race
+			 * with other readers
+			 */
+			err = fuse_vnode_setsize(vp, uio->uio_offset + n);
+			filesize = uio->uio_offset + n;
+			fvdat->flag |= FN_SIZECHANGE;
+			if (err) {
+				brelse(bp);
+				break;
+			} 
+		}
+
+		SDT_PROBE6(fusefs, , io, write_biobackend_start,
+			lbn, on, n, uio, bcount, direct_append);
 		/*
 	         * Issue a READ if B_CACHE is not set.  In special-append
 	         * mode, B_CACHE is based on the buffer prior to the write
@@ -535,6 +738,21 @@
 				brelse(bp);
 				break;
 			}
+			if (bp->b_resid > 0) {
+				/* 
+				 * Short read indicates EOF.  Update file size
+				 * from the server and try again.
+				 */
+				SDT_PROBE2(fusefs, , io, trace, 1,
+					"Short read during a RMW");
+				brelse(bp);
+				err = fuse_vnode_size(vp, &filesize, cred,
+				    curthread);
+				if (err)
+					break;
+				else
+					goto again;
+			}
 		}
 		if (bp->b_wcred == NOCRED)
 			bp->b_wcred = crhold(cred);
@@ -547,9 +765,8 @@
 	         * If the chopping creates a reverse-indexed or degenerate
 	         * situation with dirtyoff/end, we 0 both of them.
 	         */
-
 		if (bp->b_dirtyend > bcount) {
-			SDT_PROBE2(fuse, , io, write_biobackend_append_race,
+			SDT_PROBE2(fusefs, , io, write_biobackend_append_race,
 			    (long)bp->b_blkno * biosize,
 			    bp->b_dirtyend - bcount);
 			bp->b_dirtyend = bcount;
@@ -582,6 +799,7 @@
 	                 * reasons: the only way to know if a write is valid
 	                 * if its actually written out.)
 	                 */
+			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 0, bp);
 			bwrite(bp);
 			if (bp->b_error == EINTR) {
 				err = EINTR;
@@ -591,19 +809,12 @@
 		}
 		err = uiomove((char *)bp->b_data + on, n, uio);
 
-		/*
-	         * Since this block is being modified, it must be written
-	         * again and not just committed.  Since write clustering does
-	         * not work for the stage 1 data write, only the stage 2
-	         * commit rpc, we have to clear B_CLUSTEROK as well.
-	         */
-		bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
-
 		if (err) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = err;
 			brelse(bp);
 			break;
+			/* TODO: vfs_bio_clrbuf like ffs_write does? */
 		}
 		/*
 	         * Only update dirtyoff/dirtyend if not a degenerate
@@ -619,42 +830,85 @@
 			}
 			vfs_bio_set_valid(bp, on, n);
 		}
-		err = bwrite(bp);
+
+		vfs_bio_set_flags(bp, ioflag);
+
+		bp->b_flags |= B_FUSEFS_WRITE_CACHE;
+		if (ioflag & IO_SYNC) {
+			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 2, bp);
+			if (!(ioflag & IO_VMIO))
+				bp->b_flags &= ~B_FUSEFS_WRITE_CACHE;
+			err = bwrite(bp);
+		} else if (vm_page_count_severe() ||
+			    buf_dirty_count_severe() ||
+			    (ioflag & IO_ASYNC)) {
+			bp->b_flags |= B_CLUSTEROK;
+			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 3, bp);
+			bawrite(bp);
+		} else if (on == 0 && n == bcount) {
+			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
+				bp->b_flags |= B_CLUSTEROK;
+				SDT_PROBE2(fusefs, , io, write_biobackend_issue,
+					4, bp);
+				cluster_write(vp, bp, filesize, seqcount, 0);
+			} else {
+				SDT_PROBE2(fusefs, , io, write_biobackend_issue,
+					5, bp);
+				bawrite(bp);
+			}
+		} else if (ioflag & IO_DIRECT) {
+			bp->b_flags |= B_CLUSTEROK;
+			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 6, bp);
+			bawrite(bp);
+		} else {
+			bp->b_flags &= ~B_CLUSTEROK;
+			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 7, bp);
+			bdwrite(bp);
+		}
 		if (err)
 			break;
 	} while (uio->uio_resid > 0 && n > 0);
 
-	if (fuse_sync_resize && (fvdat->flag & FN_SIZECHANGE) != 0)
-		fuse_vnode_savesize(vp, cred);
-
 	return (err);
 }
 
 int
 fuse_io_strategy(struct vnode *vp, struct buf *bp)
 {
-	struct fuse_filehandle *fufh;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
+	struct fuse_filehandle *fufh;
 	struct ucred *cred;
 	struct uio *uiop;
 	struct uio uio;
 	struct iovec io;
+	off_t filesize;
 	int error = 0;
+	int fflag;
+	/* We don't know the true pid when we're dealing with the cache */
+	pid_t pid = 0;
 
 	const int biosize = fuse_iosize(vp);
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 	MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE);
 
-	error = fuse_filehandle_getrw(vp,
-	    (bp->b_iocmd == BIO_READ) ? FUFH_RDONLY : FUFH_WRONLY, &fufh);
+	fflag = bp->b_iocmd == BIO_READ ? FREAD : FWRITE;
+	cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred;
+	error = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
+	if (bp->b_iocmd == BIO_READ && error == EBADF) {
+		/* 
+		 * This may be a read-modify-write operation on a cached file
+		 * opened O_WRONLY.  The FUSE protocol allows this.
+		 */
+		error = fuse_filehandle_get(vp, FWRITE, &fufh, cred, pid);
+	}
 	if (error) {
 		printf("FUSE: strategy: filehandles are closed\n");
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = error;
+		bufdone(bp);
 		return (error);
 	}
-	cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred;
 
 	uiop = &uio;
 	uiop->uio_iov = &io;
@@ -673,40 +927,57 @@
 	KASSERT(!(bp->b_flags & B_DONE),
 	    ("fuse_io_strategy: bp %p already marked done", bp));
 	if (bp->b_iocmd == BIO_READ) {
+		ssize_t left;
+
 		io.iov_len = uiop->uio_resid = bp->b_bcount;
 		io.iov_base = bp->b_data;
 		uiop->uio_rw = UIO_READ;
 
-		uiop->uio_offset = ((off_t)bp->b_blkno) * biosize;
+		uiop->uio_offset = ((off_t)bp->b_lblkno) * biosize;
 		error = fuse_read_directbackend(vp, uiop, cred, fufh);
+		/* 
+		 * Store the amount we failed to read in the buffer's private
+		 * field, so callers can truncate the file if necessary'
+		 */
 
-		/* XXXCEM: Potentially invalid access to cached_attrs here */
-		if ((!error && uiop->uio_resid) ||
-		    (fsess_opt_brokenio(vnode_mount(vp)) && error == EIO &&
-		    uiop->uio_offset < fvdat->filesize && fvdat->filesize > 0 &&
-		    uiop->uio_offset >= fvdat->cached_attrs.va_size)) {
-			/*
-	                 * If we had a short read with no error, we must have
-	                 * hit a file hole.  We should zero-fill the remainder.
-	                 * This can also occur if the server hits the file EOF.
-	                 *
-	                 * Holes used to be able to occur due to pending
-	                 * writes, but that is not possible any longer.
-	                 */
+		if (!error && uiop->uio_resid) {
 			int nread = bp->b_bcount - uiop->uio_resid;
-			int left = uiop->uio_resid;
+			left = uiop->uio_resid;
+			bzero((char *)bp->b_data + nread, left);
 
-			if (error != 0) {
-				printf("FUSE: Fix broken io: offset %ju, "
-				       " resid %zd, file size %ju/%ju\n", 
-				       (uintmax_t)uiop->uio_offset,
-				    uiop->uio_resid, fvdat->filesize,
-				    fvdat->cached_attrs.va_size);
-				error = 0;
+			if ((fvdat->flag & FN_SIZECHANGE) == 0) {
+				/*
+				 * A short read with no error, when not using
+				 * direct io, and when no writes are cached,
+				 * indicates EOF caused by a server-side
+				 * truncation.  Clear the attr cache so we'll
+				 * pick up the new file size and timestamps.
+				 *
+				 * We must still bzero the remaining buffer so
+				 * uninitialized data doesn't get exposed by a
+				 * future truncate that extends the file.
+				 * 
+				 * To prevent lock order problems, we must
+				 * truncate the file upstack, not here.
+				 */
+				SDT_PROBE2(fusefs, , io, trace, 1,
+					"Short read of a clean file");
+				fuse_vnode_clear_attr_cache(vp);
+			} else {
+				/*
+				 * If dirty writes _are_ cached beyond EOF,
+				 * that indicates a newly created hole that the
+				 * server doesn't know about.  Those don't pose
+				 * any problem.
+				 * XXX: we don't currently track whether dirty
+				 * writes are cached beyond EOF, before EOF, or
+				 * both.
+				 */
+				SDT_PROBE2(fusefs, , io, trace, 1,
+					"Short read of a dirty file");
+				uiop->uio_resid = 0;
 			}
-			if (left > 0)
-				bzero((char *)bp->b_data + nread, left);
-			uiop->uio_resid = 0;
+
 		}
 		if (error) {
 			bp->b_ioflags |= BIO_ERROR;
@@ -714,33 +985,33 @@
 		}
 	} else {
 		/*
-	         * If we only need to commit, try to commit
-	         */
-		if (bp->b_flags & B_NEEDCOMMIT) {
-			SDT_PROBE2(fuse, , io, trace, 1,
-				"write: B_NEEDCOMMIT flags set");
-		}
-		/*
 	         * Setup for actual write
 	         */
-		if ((off_t)bp->b_blkno * biosize + bp->b_dirtyend > 
-		    fvdat->filesize)
-			bp->b_dirtyend = fvdat->filesize - 
-				(off_t)bp->b_blkno * biosize;
+		error = fuse_vnode_size(vp, &filesize, cred, curthread);
+		if (error) {
+			bp->b_ioflags |= BIO_ERROR;
+			bp->b_error = error;
+			bufdone(bp);
+			return (error);
+		}
 
+		if ((off_t)bp->b_lblkno * biosize + bp->b_dirtyend > filesize)
+			bp->b_dirtyend = filesize - 
+				(off_t)bp->b_lblkno * biosize;
+
 		if (bp->b_dirtyend > bp->b_dirtyoff) {
 			io.iov_len = uiop->uio_resid = bp->b_dirtyend
 			    - bp->b_dirtyoff;
-			uiop->uio_offset = (off_t)bp->b_blkno * biosize
+			uiop->uio_offset = (off_t)bp->b_lblkno * biosize
 			    + bp->b_dirtyoff;
 			io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 			uiop->uio_rw = UIO_WRITE;
 
-			error = fuse_write_directbackend(vp, uiop, cred, fufh, 0);
+			bool pages = bp->b_flags & B_FUSEFS_WRITE_CACHE;
+			error = fuse_write_directbackend(vp, uiop, cred, fufh,
+				filesize, 0, pages);
 
-			if (error == EINTR || error == ETIMEDOUT
-			    || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
-
+			if (error == EINTR || error == ETIMEDOUT) {
 				bp->b_flags &= ~(B_INVAL | B_NOCACHE);
 				if ((bp->b_flags & B_PAGING) == 0) {
 					bdirty(bp);
Index: sys/fs/fuse/fuse_ipc.h
===================================================================
--- sys/fs/fuse/fuse_ipc.h
+++ sys/fs/fuse/fuse_ipc.h
@@ -32,6 +32,11 @@
  * 
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
+ *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -63,6 +68,12 @@
 #include <sys/param.h>
 #include <sys/refcount.h>
 
+enum fuse_data_cache_mode {
+	FUSE_CACHE_UC,
+	FUSE_CACHE_WT,
+	FUSE_CACHE_WB,
+};
+
 struct fuse_iov {
 	void   *base;
 	size_t  len;
@@ -103,6 +114,12 @@
 	struct fuse_data		*tk_data;
 	int				tk_flag;
 	u_int				tk_refcount;
+	/* 
+	 * If this ticket's operation has been interrupted, this will hold the
+	 * unique value of the FUSE_INTERRUPT operation.  Otherwise, it will be
+	 * 0.
+	 */
+	uint64_t			irq_unique;
 
 	/* fields for initiating an upgoing message */
 	struct fuse_iov			tk_ms_fiov;
@@ -147,16 +164,20 @@
 	ftick->tk_flag |= FT_ANSW;
 }
 
+static inline struct fuse_in_header*
+fticket_in_header(struct fuse_ticket *ftick)
+{
+	return (struct fuse_in_header *)(ftick->tk_ms_fiov.base);
+}
+
 static inline enum fuse_opcode
 fticket_opcode(struct fuse_ticket *ftick)
 {
-	return (((struct fuse_in_header *)(ftick->tk_ms_fiov.base))->opcode);
+	return fticket_in_header(ftick)->opcode;
 }
 
 int fticket_pull(struct fuse_ticket *ftick, struct uio *uio);
 
-enum mountpri { FM_NOMOUNTED, FM_PRIMARY, FM_SECONDARY };
-
 /*
  * The data representing a FUSE session.
  */
@@ -170,10 +191,16 @@
 
 	struct mtx			ms_mtx;
 	STAILQ_HEAD(, fuse_ticket)	ms_head;
+	int				ms_count;
 
 	struct mtx			aw_mtx;
 	TAILQ_HEAD(, fuse_ticket)	aw_head;
 
+	/* 
+	 * Holds the next value of the FUSE operation unique value.
+	 * Also, serves as a wakeup channel to prevent any operations from
+	 * being created before INIT completes.
+	 */
 	u_long				ticketer;
 
 	struct sx			rename_lock;
@@ -181,6 +208,7 @@
 	uint32_t			fuse_libabi_major;
 	uint32_t			fuse_libabi_minor;
 
+	uint32_t			max_readahead_blocks;
 	uint32_t			max_write;
 	uint32_t			max_read;
 	uint32_t			subtype;
@@ -189,34 +217,26 @@
 	struct selinfo			ks_rsel;
 
 	int				daemon_timeout;
+	unsigned			time_gran;
 	uint64_t			notimpl;
+	uint64_t			mnt_flag;
+	enum fuse_data_cache_mode	cache_mode;
 };
 
 #define FSESS_DEAD                0x0001 /* session is to be closed */
-#define FSESS_UNUSED0             0x0002 /* unused */
 #define FSESS_INITED              0x0004 /* session has been inited */
 #define FSESS_DAEMON_CAN_SPY      0x0010 /* let non-owners access this fs */
                                          /* (and being observed by the daemon) */
 #define FSESS_PUSH_SYMLINKS_IN    0x0020 /* prefix absolute symlinks with mp */
 #define FSESS_DEFAULT_PERMISSIONS 0x0040 /* kernel does permission checking */
-#define FSESS_NO_ATTRCACHE        0x0080 /* no attribute caching */
-#define FSESS_NO_READAHEAD        0x0100 /* no readaheads */
-#define FSESS_NO_DATACACHE        0x0200 /* disable buffer cache */
-#define FSESS_NO_NAMECACHE        0x0400 /* disable name cache */
-#define FSESS_NO_MMAP             0x0800 /* disable mmap */
-#define FSESS_BROKENIO            0x1000 /* fix broken io */
+#define FSESS_ASYNC_READ          0x1000 /* allow multiple reads of some file */
+#define FSESS_POSIX_LOCKS         0x2000 /* daemon supports POSIX locks */
+#define FSESS_EXPORT_SUPPORT      0x10000 /* daemon supports NFS-style lookups */
+#define FSESS_MNTOPTS_MASK	( \
+	FSESS_DAEMON_CAN_SPY | FSESS_PUSH_SYMLINKS_IN | \
+	FSESS_DEFAULT_PERMISSIONS)
 
-enum fuse_data_cache_mode {
-	FUSE_CACHE_UC,
-	FUSE_CACHE_WT,
-	FUSE_CACHE_WB,
-};
-
 extern int fuse_data_cache_mode;
-extern int fuse_data_cache_invalidate;
-extern int fuse_mmap_enable;
-extern int fuse_sync_resize;
-extern int fuse_fix_broken_io;
 
 static inline struct fuse_data *
 fuse_get_mpdata(struct mount *mp)
@@ -245,36 +265,43 @@
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
-	return (fuse_data_cache_mode != FUSE_CACHE_UC &&
-	    (data->dataflags & FSESS_NO_DATACACHE) == 0);
+	return (data->cache_mode != FUSE_CACHE_UC);
 }
 
 static inline bool
 fsess_opt_mmap(struct mount *mp)
 {
-	struct fuse_data *data = fuse_get_mpdata(mp);
-
-	if (!fuse_mmap_enable || fuse_data_cache_mode == FUSE_CACHE_UC)
-		return (false);
-	return ((data->dataflags & (FSESS_NO_DATACACHE | FSESS_NO_MMAP)) == 0);
+	return (fsess_opt_datacache(mp));
 }
 
 static inline bool
-fsess_opt_brokenio(struct mount *mp)
+fsess_opt_writeback(struct mount *mp)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
 
-	return (fuse_fix_broken_io || (data->dataflags & FSESS_BROKENIO));
+	return (data->cache_mode == FUSE_CACHE_WB);
 }
 
+/* Insert a new upgoing message */
 static inline void
 fuse_ms_push(struct fuse_ticket *ftick)
 {
 	mtx_assert(&ftick->tk_data->ms_mtx, MA_OWNED);
 	refcount_acquire(&ftick->tk_refcount);
 	STAILQ_INSERT_TAIL(&ftick->tk_data->ms_head, ftick, tk_ms_link);
+	ftick->tk_data->ms_count++;
 }
 
+/* Insert a new upgoing message to the front of the queue */
+static inline void
+fuse_ms_push_head(struct fuse_ticket *ftick)
+{
+	mtx_assert(&ftick->tk_data->ms_mtx, MA_OWNED);
+	refcount_acquire(&ftick->tk_refcount);
+	STAILQ_INSERT_HEAD(&ftick->tk_data->ms_head, ftick, tk_ms_link);
+	ftick->tk_data->ms_count++;
+}
+
 static inline struct fuse_ticket *
 fuse_ms_pop(struct fuse_data *data)
 {
@@ -284,7 +311,9 @@
 
 	if ((ftick = STAILQ_FIRST(&data->ms_head))) {
 		STAILQ_REMOVE_HEAD(&data->ms_head, tk_ms_link);
+		data->ms_count--;
 #ifdef INVARIANTS
+		MPASS(data->ms_count >= 0);
 		ftick->tk_ms_link.stqe_next = NULL;
 #endif
 	}
@@ -327,7 +356,7 @@
 struct fuse_ticket *fuse_ticket_fetch(struct fuse_data *data);
 int fuse_ticket_drop(struct fuse_ticket *ftick);
 void fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t *handler);
-void fuse_insert_message(struct fuse_ticket *ftick);
+void fuse_insert_message(struct fuse_ticket *ftick, bool irq);
 
 static inline bool
 fuse_libabi_geq(struct fuse_data *data, uint32_t abi_maj, uint32_t abi_min)
@@ -374,13 +403,15 @@
 #endif
 }
 
+void fdisp_refresh(struct fuse_dispatcher *fdip);
+
 void fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct mount *mp, uint64_t nid, struct thread *td, struct ucred *cred);
 
-void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
-    struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred);
-
 void fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
+    struct vnode *vp, struct thread *td, struct ucred *cred);
+
+void fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct vnode *vp, struct thread *td, struct ucred *cred);
 
 int fdisp_wait_answ(struct fuse_dispatcher *fdip);
Index: sys/fs/fuse/fuse_ipc.c
===================================================================
--- sys/fs/fuse/fuse_ipc.c
+++ sys/fs/fuse/fuse_ipc.c
@@ -33,6 +33,11 @@
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -61,6 +66,7 @@
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
+#include <sys/counter.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
@@ -84,14 +90,17 @@
 #include "fuse_ipc.h"
 #include "fuse_internal.h"
 
-SDT_PROVIDER_DECLARE(fuse);
+SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
-SDT_PROBE_DEFINE2(fuse, , ipc, trace, "int", "char*");
+SDT_PROBE_DEFINE2(fusefs, , ipc, trace, "int", "char*");
 
+static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
+    struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred);
+static void fuse_interrupt_send(struct fuse_ticket *otick, int err);
 static struct fuse_ticket *fticket_alloc(struct fuse_data *data);
 static void fticket_refresh(struct fuse_ticket *ftick);
 static void fticket_destroy(struct fuse_ticket *ftick);
@@ -104,13 +113,10 @@
 
 static fuse_handler_t fuse_standard_handler;
 
-SYSCTL_NODE(_vfs, OID_AUTO, fusefs, CTLFLAG_RW, 0, "FUSE tunables");
-SYSCTL_STRING(_vfs_fusefs, OID_AUTO, version, CTLFLAG_RD,
-    FUSE_FREEBSD_VERSION, 0, "fuse-freebsd version");
-static int fuse_ticket_count = 0;
+static counter_u64_t fuse_ticket_count;
+SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, ticket_count, CTLFLAG_RD,
+    &fuse_ticket_count, "Number of allocated tickets");
 
-SYSCTL_INT(_vfs_fusefs, OID_AUTO, ticket_count, CTLFLAG_RW,
-    &fuse_ticket_count, 0, "number of allocated tickets");
 static long fuse_iov_permanent_bufsize = 1 << 19;
 
 SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,
@@ -125,25 +131,131 @@
 MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer");
 static uma_zone_t ticket_zone;
 
-static void
-fuse_block_sigs(sigset_t *oldset)
+/* 
+ * TODO: figure out how to timeout INTERRUPT requests, because the daemon may
+ * leagally never respond
+ */
+static int
+fuse_interrupt_callback(struct fuse_ticket *tick, struct uio *uio)
 {
-	sigset_t newset;
+	struct fuse_ticket *otick, *x_tick;
+	struct fuse_interrupt_in *fii;
+	struct fuse_data *data = tick->tk_data;
+	bool found = false;
 
-	SIGFILLSET(newset);
-	SIGDELSET(newset, SIGKILL);
-	if (kern_sigprocmask(curthread, SIG_BLOCK, &newset, oldset, 0))
-		panic("%s: Invalid operation for kern_sigprocmask()",
-		    __func__);
+	fii = (struct fuse_interrupt_in*)((char*)tick->tk_ms_fiov.base +
+		sizeof(struct fuse_in_header));
+
+	fuse_lck_mtx_lock(data->aw_mtx);
+	TAILQ_FOREACH_SAFE(otick, &data->aw_head, tk_aw_link, x_tick) {
+		if (otick->tk_unique == fii->unique) {
+			found = true;
+			break;
+		}
+	}
+	fuse_lck_mtx_unlock(data->aw_mtx);
+
+	if (!found) {
+		/* Original is already complete.  Just return */
+		return 0;
+	}
+
+	/* Clear the original ticket's interrupt association */
+	otick->irq_unique = 0;
+
+	if (tick->tk_aw_ohead.error == ENOSYS) {
+		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
+		return 0;
+	} else if (tick->tk_aw_ohead.error == EAGAIN) {
+		/* 
+		 * There are two reasons we might get this:
+		 * 1) the daemon received the INTERRUPT request before the
+		 *    original, or
+		 * 2) the daemon received the INTERRUPT request after it
+		 *    completed the original request.
+		 * In the first case we should re-send the INTERRUPT.  In the
+		 * second, we should ignore it.
+		 */
+		/* Resend */
+		fuse_interrupt_send(otick, EINTR);
+		return 0;
+	} else {
+		/* Illegal FUSE_INTERRUPT response */
+		return EINVAL;
+	}
 }
 
-static void
-fuse_restore_sigs(sigset_t *oldset)
+/* Interrupt the operation otick.  Return err as its error code */
+void
+fuse_interrupt_send(struct fuse_ticket *otick, int err)
 {
+	struct fuse_dispatcher fdi;
+	struct fuse_interrupt_in *fii;
+	struct fuse_in_header *ftick_hdr;
+	struct fuse_data *data = otick->tk_data;
+	struct fuse_ticket *tick, *xtick;
+	struct ucred reused_creds;
+	gid_t reused_groups[1];
 
-	if (kern_sigprocmask(curthread, SIG_SETMASK, oldset, NULL, 0))
-		panic("%s: Invalid operation for kern_sigprocmask()",
-		    __func__);
+	if (otick->irq_unique == 0) {
+		/* 
+		 * If the daemon hasn't yet received otick, then we can answer
+		 * it ourselves and return.
+		 */
+		fuse_lck_mtx_lock(data->ms_mtx);
+		STAILQ_FOREACH_SAFE(tick, &otick->tk_data->ms_head, tk_ms_link,
+			xtick) {
+			if (tick == otick) {
+				STAILQ_REMOVE(&otick->tk_data->ms_head, tick,
+					fuse_ticket, tk_ms_link);
+				otick->tk_data->ms_count--;
+				otick->tk_ms_link.stqe_next = NULL;
+				fuse_lck_mtx_unlock(data->ms_mtx);
+
+				fuse_lck_mtx_lock(otick->tk_aw_mtx);
+				if (!fticket_answered(otick)) {
+					fticket_set_answered(otick);
+					otick->tk_aw_errno = err;
+					wakeup(otick);
+				}
+				fuse_lck_mtx_unlock(otick->tk_aw_mtx);
+
+				fuse_ticket_drop(tick);
+				return;
+			}
+		}
+		fuse_lck_mtx_unlock(data->ms_mtx);
+
+		/*
+		 * If the fuse daemon doesn't support interrupts, then there's
+		 * nothing more that we can do
+		 */
+		if (!fsess_isimpl(data->mp, FUSE_INTERRUPT))
+			return;
+
+		/* 
+		 * If the fuse daemon has already received otick, then we must
+		 * send FUSE_INTERRUPT.
+		 */
+		ftick_hdr = fticket_in_header(otick);
+		reused_creds.cr_uid = ftick_hdr->uid;
+		reused_groups[0] = ftick_hdr->gid;
+		reused_creds.cr_groups = reused_groups;
+		fdisp_init(&fdi, sizeof(*fii));
+		fdisp_make_pid(&fdi, FUSE_INTERRUPT, data, ftick_hdr->nodeid,
+			ftick_hdr->pid, &reused_creds);
+
+		fii = fdi.indata;
+		fii->unique = otick->tk_unique;
+		fuse_insert_callback(fdi.tick, fuse_interrupt_callback);
+
+		otick->irq_unique = fdi.tick->tk_unique;
+		/* Interrupt ops should be delivered ASAP */
+		fuse_insert_message(fdi.tick, true);
+		fdisp_destroy(&fdi);
+	} else {
+		/* This ticket has already been interrupted */
+	}
 }
 
 void
@@ -181,14 +293,19 @@
 		}
 		fiov->allocated_size = FU_AT_LEAST(size);
 		fiov->credit = fuse_iov_credit;
+		/* Clear data buffer after reallocation */
+		bzero(fiov->base, size);
+	} else if (size > fiov->len) {
+		/* Clear newly extended portion of data buffer */
+		bzero((char*)fiov->base + fiov->len, size - fiov->len);
 	}
 	fiov->len = size;
 }
 
+/* Resize the fiov if needed, and clear it's buffer */
 void
 fiov_refresh(struct fuse_iov *fiov)
 {
-	bzero(fiov->base, fiov->len);
 	fiov_adjust(fiov, 0);
 }
 
@@ -211,8 +328,10 @@
 	if (ftick->tk_unique == 0)
 		ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
 
+	ftick->irq_unique = 0;
+
 	refcount_init(&ftick->tk_refcount, 1);
-	atomic_add_acq_int(&fuse_ticket_count, 1);
+	counter_u64_add(fuse_ticket_count, 1);
 
 	return 0;
 }
@@ -227,7 +346,7 @@
 	FUSE_ASSERT_MS_DONE(ftick);
 	FUSE_ASSERT_AW_DONE(ftick);
 
-	atomic_subtract_acq_int(&fuse_ticket_count, 1);
+	counter_u64_add(fuse_ticket_count, -1);
 }
 
 static int
@@ -269,7 +388,7 @@
 	return uma_zfree(ticket_zone, ftick);
 }
 
-static	inline
+static inline
 void
 fticket_refresh(struct fuse_ticket *ftick)
 {
@@ -292,15 +411,48 @@
 	ftick->tk_flag = 0;
 }
 
+/* Prepar the ticket to be reused, but don't clear its data buffers */
+static inline void
+fticket_reset(struct fuse_ticket *ftick)
+{
+	FUSE_ASSERT_MS_DONE(ftick);
+	FUSE_ASSERT_AW_DONE(ftick);
+
+	ftick->tk_ms_bufdata = NULL;
+	ftick->tk_ms_bufsize = 0;
+	ftick->tk_ms_type = FT_M_FIOV;
+
+	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
+
+	ftick->tk_aw_errno = 0;
+	ftick->tk_aw_bufdata = NULL;
+	ftick->tk_aw_bufsize = 0;
+	ftick->tk_aw_type = FT_A_FIOV;
+
+	ftick->tk_flag = 0;
+}
+
 static int
 fticket_wait_answer(struct fuse_ticket *ftick)
 {
-	sigset_t tset;
-	int err = 0;
+	struct thread *td = curthread;
+	sigset_t blockedset, oldset;
+	int err = 0, stops_deferred;
 	struct fuse_data *data;
 
+	if (fsess_isimpl(ftick->tk_data->mp, FUSE_INTERRUPT)) {
+		SIGEMPTYSET(blockedset);
+	} else {
+		/* May as well block all signals */
+		SIGFILLSET(blockedset);
+		SIGDELSET(blockedset, SIGKILL);
+	}
+	stops_deferred = sigdeferstop(SIGDEFERSTOP_SILENT);
+	kern_sigprocmask(td, SIG_BLOCK, NULL, &oldset, 0);
+
 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
 
+retry:
 	if (fticket_answered(ftick)) {
 		goto out;
 	}
@@ -311,11 +463,13 @@
 		fticket_set_answered(ftick);
 		goto out;
 	}
-	fuse_block_sigs(&tset);
+	kern_sigprocmask(td, SIG_BLOCK, &blockedset, NULL, 0);
 	err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans",
 	    data->daemon_timeout * hz);
-	fuse_restore_sigs(&tset);
-	if (err == EAGAIN) {		/* same as EWOULDBLOCK */
+	kern_sigprocmask(td, SIG_SETMASK, &oldset, NULL, 0);
+	if (err == EWOULDBLOCK) {
+		SDT_PROBE2(fusefs, , ipc, trace, 3,
+			"fticket_wait_answer: EWOULDBLOCK");
 #ifdef XXXIP				/* die conditionally */
 		if (!fdata_get_dead(data)) {
 			fdata_set_dead(data);
@@ -323,14 +477,58 @@
 #endif
 		err = ETIMEDOUT;
 		fticket_set_answered(ftick);
+	} else if ((err == EINTR || err == ERESTART)) {
+		/*
+		 * Whether we get EINTR or ERESTART depends on whether
+		 * SA_RESTART was set by sigaction(2).
+		 *
+		 * Try to interrupt the operation and wait for an EINTR response
+		 * to the original operation.  If the file system does not
+		 * support FUSE_INTERRUPT, then we'll just wait for it to
+		 * complete like normal.  If it does support FUSE_INTERRUPT,
+		 * then it will either respond EINTR to the original operation,
+		 * or EAGAIN to the interrupt.
+		 */
+		int sig;
+		bool fatal;
+
+		SDT_PROBE2(fusefs, , ipc, trace, 4,
+			"fticket_wait_answer: interrupt");
+		fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
+		fuse_interrupt_send(ftick, err);
+
+		PROC_LOCK(td->td_proc);
+		mtx_lock(&td->td_proc->p_sigacts->ps_mtx);
+		sig = cursig(td);
+		fatal = sig_isfatal(td->td_proc, sig);
+		mtx_unlock(&td->td_proc->p_sigacts->ps_mtx);
+		PROC_UNLOCK(td->td_proc);
+
+		fuse_lck_mtx_lock(ftick->tk_aw_mtx);
+		if (!fatal) {
+			/* 
+			 * Block the just-delivered signal while we wait for an
+			 * interrupt response
+			 */
+			SIGADDSET(blockedset, sig);
+			goto retry;
+		} else {
+			/* Return immediately for fatal signals */
+		}
+	} else if (err) {
+		SDT_PROBE2(fusefs, , ipc, trace, 6,
+			"fticket_wait_answer: other error");
+	} else {
+		SDT_PROBE2(fusefs, , ipc, trace, 7, "fticket_wait_answer: OK");
 	}
 out:
 	if (!(err || fticket_answered(ftick))) {
-		SDT_PROBE2(fuse, , ipc, trace, 1,
+		SDT_PROBE2(fusefs, , ipc, trace, 1,
 			"FUSE: requester was woken up but still no answer");
 		err = ENXIO;
 	}
 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
+	sigallowstop(stops_deferred);
 
 	return err;
 }
@@ -386,6 +584,8 @@
 	data->fdev = fdev;
 	mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF);
 	STAILQ_INIT(&data->ms_head);
+	data->ms_count = 0;
+	knlist_init_mtx(&data->ks_rsel.si_note, &data->ms_mtx);
 	mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF);
 	TAILQ_INIT(&data->aw_head);
 	data->daemoncred = crhold(cred);
@@ -405,11 +605,12 @@
 		return;
 
 	/* Driving off stage all that stuff thrown at device... */
-	mtx_destroy(&data->ms_mtx);
-	mtx_destroy(&data->aw_mtx);
 	sx_destroy(&data->rename_lock);
-
 	crfree(data->daemoncred);
+	mtx_destroy(&data->aw_mtx);
+	knlist_delete(&data->ks_rsel.si_note, curthread, 0);
+	knlist_destroy(&data->ks_rsel.si_note);
+	mtx_destroy(&data->ms_mtx);
 
 	free(data, M_FUSEMSG);
 }
@@ -478,8 +679,14 @@
 	fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx);
 }
 
+/*
+ * Insert a new upgoing ticket into the message queue
+ *
+ * If urgent is true, insert at the front of the queue.  Otherwise, insert in
+ * FIFO order.
+ */
 void
-fuse_insert_message(struct fuse_ticket *ftick)
+fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
 {
 	if (ftick->tk_flag & FT_DIRTY) {
 		panic("FUSE: ticket reused without being refreshed");
@@ -490,9 +697,13 @@
 		return;
 	}
 	fuse_lck_mtx_lock(ftick->tk_data->ms_mtx);
-	fuse_ms_push(ftick);
+	if (urgent)
+		fuse_ms_push_head(ftick);
+	else
+		fuse_ms_push(ftick);
 	wakeup_one(ftick->tk_data);
 	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
+	KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
 	fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
 }
 
@@ -505,8 +716,21 @@
 	opcode = fticket_opcode(ftick);
 
 	switch (opcode) {
+	case FUSE_BMAP:
+		err = (blen == sizeof(struct fuse_bmap_out)) ? 0 : EINVAL;
+		break;
+
+	case FUSE_LINK:
 	case FUSE_LOOKUP:
-		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
+	case FUSE_MKDIR:
+	case FUSE_MKNOD:
+	case FUSE_SYMLINK:
+		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
+			err = (blen == sizeof(struct fuse_entry_out)) ?
+				0 : EINVAL;
+		} else {
+			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE) ? 0 : EINVAL;
+		}
 		break;
 
 	case FUSE_FORGET:
@@ -514,29 +738,19 @@
 		break;
 
 	case FUSE_GETATTR:
-		err = (blen == sizeof(struct fuse_attr_out)) ? 0 : EINVAL;
-		break;
-
 	case FUSE_SETATTR:
-		err = (blen == sizeof(struct fuse_attr_out)) ? 0 : EINVAL;
+		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
+			err = (blen == sizeof(struct fuse_attr_out)) ? 
+			  0 : EINVAL;
+		} else {
+			err = (blen == FUSE_COMPAT_ATTR_OUT_SIZE) ? 0 : EINVAL;
+		}
 		break;
 
 	case FUSE_READLINK:
 		err = (PAGE_SIZE >= blen) ? 0 : EINVAL;
 		break;
 
-	case FUSE_SYMLINK:
-		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
-		break;
-
-	case FUSE_MKNOD:
-		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
-		break;
-
-	case FUSE_MKDIR:
-		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
-		break;
-
 	case FUSE_UNLINK:
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
@@ -549,10 +763,6 @@
 		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
-	case FUSE_LINK:
-		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
-		break;
-
 	case FUSE_OPEN:
 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
 		break;
@@ -607,7 +817,9 @@
 		break;
 
 	case FUSE_INIT:
-		if (blen == sizeof(struct fuse_init_out) || blen == 8) {
+		if (blen == sizeof(struct fuse_init_out) ||
+		    blen == FUSE_COMPAT_INIT_OUT_SIZE ||
+		    blen == FUSE_COMPAT_22_INIT_OUT_SIZE) {
 			err = 0;
 		} else {
 			err = EINVAL;
@@ -634,15 +846,15 @@
 		break;
 
 	case FUSE_GETLK:
-		panic("FUSE: no response body format check for FUSE_GETLK");
+		err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL;
 		break;
 
 	case FUSE_SETLK:
-		panic("FUSE: no response body format check for FUSE_SETLK");
+		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_SETLKW:
-		panic("FUSE: no response body format check for FUSE_SETLKW");
+		err = (blen == 0) ? 0 : EINVAL;
 		break;
 
 	case FUSE_ACCESS:
@@ -650,8 +862,13 @@
 		break;
 
 	case FUSE_CREATE:
-		err = (blen == sizeof(struct fuse_entry_out) +
-		    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
+		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
+			err = (blen == sizeof(struct fuse_entry_out) +
+			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
+		} else {
+			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE +
+			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
+		}
 		break;
 
 	case FUSE_DESTROY:
@@ -677,7 +894,7 @@
 
 	ihead->pid = pid;
 	ihead->uid = cred->cr_uid;
-	ihead->gid = cred->cr_rgid;
+	ihead->gid = cred->cr_groups[0];
 }
 
 /*
@@ -705,18 +922,38 @@
 	return err;
 }
 
-void
-fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
+/*
+ * Reinitialize a dispatcher from a pid and node id, without resizing or
+ * clearing its data buffers
+ */
+static void
+fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
 {
-	struct fuse_data *data = fuse_get_mpdata(mp);
+	MPASS(fdip->tick);
+	MPASS2(sizeof(fdip->finh) + fdip->iosize <= fdip->tick->tk_ms_fiov.len,
+		"Must use fdisp_make_pid to increase the size of the fiov");
+	fticket_reset(fdip->tick);
 
+	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
+	    fdip->indata, fdip->iosize);
+
+	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid,
+		cred);
+}
+
+/* Initialize a dispatcher from a pid and node id */
+static void
+fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
+    struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred)
+{
 	if (fdip->tick) {
 		fticket_refresh(fdip->tick);
 	} else {
 		fdip->tick = fuse_ticket_fetch(data);
 	}
 
+	/* FUSE_DIMALLOC will bzero the fiovs when it enlarges them */
 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
 	    fdip->indata, fdip->iosize);
 
@@ -727,22 +964,42 @@
 fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp,
     uint64_t nid, struct thread *td, struct ucred *cred)
 {
+	struct fuse_data *data = fuse_get_mpdata(mp);
 	RECTIFY_TDCR(td, cred);
 
-	return fdisp_make_pid(fdip, op, mp, nid, td->td_proc->p_pid, cred);
+	return fdisp_make_pid(fdip, op, data, nid, td->td_proc->p_pid, cred);
 }
 
 void
 fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
     struct vnode *vp, struct thread *td, struct ucred *cred)
 {
+	struct mount *mp = vnode_mount(vp);
+	struct fuse_data *data = fuse_get_mpdata(mp);
+
 	RECTIFY_TDCR(td, cred);
-	return fdisp_make_pid(fdip, op, vnode_mount(vp), VTOI(vp),
+	return fdisp_make_pid(fdip, op, data, VTOI(vp),
 	    td->td_proc->p_pid, cred);
 }
 
-SDT_PROBE_DEFINE2(fuse, , ipc, fdisp_wait_answ_error, "char*", "int");
+/* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */
+void
+fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
+    struct vnode *vp, struct thread *td, struct ucred *cred)
+{
+	RECTIFY_TDCR(td, cred);
+	return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp),
+	    td->td_proc->p_pid, cred);
+}
 
+void
+fdisp_refresh(struct fuse_dispatcher *fdip)
+{
+	fticket_refresh(fdip->tick);
+}
+
+SDT_PROBE_DEFINE2(fusefs, , ipc, fdisp_wait_answ_error, "char*", "int");
+
 int
 fdisp_wait_answ(struct fuse_dispatcher *fdip)
 {
@@ -750,7 +1007,7 @@
 
 	fdip->answ_stat = 0;
 	fuse_insert_callback(fdip->tick, fuse_standard_handler);
-	fuse_insert_message(fdip->tick);
+	fuse_insert_message(fdip->tick, false);
 
 	if ((err = fticket_wait_answer(fdip->tick))) {
 		fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx);
@@ -761,7 +1018,7 @@
 	                 * the standard handler has completed his job.
 	                 * So we drop the ticket and exit as usual.
 	                 */
-			SDT_PROBE2(fuse, , ipc, fdisp_wait_answ_error,
+			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
 				"IPC: interrupted, already answered", err);
 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
 			goto out;
@@ -771,7 +1028,7 @@
 	                 * Then by setting the answered flag we get *him*
 	                 * to drop the ticket.
 	                 */
-			SDT_PROBE2(fuse, , ipc, fdisp_wait_answ_error,
+			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
 				"IPC: interrupted, setting to answered", err);
 			fticket_set_answered(fdip->tick);
 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
@@ -779,14 +1036,22 @@
 		}
 	}
 
-	if (fdip->tick->tk_aw_errno) {
-		SDT_PROBE2(fuse, , ipc, fdisp_wait_answ_error,
+	if (fdip->tick->tk_aw_errno == ENOTCONN) {
+		/* The daemon died while we were waiting for a response */
+		err = ENOTCONN;
+		goto out;
+	} else if (fdip->tick->tk_aw_errno) {
+		/* 
+		 * There was some sort of communication error with the daemon
+		 * that the client wouldn't understand.
+		 */
+		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
 			"IPC: explicit EIO-ing", fdip->tick->tk_aw_errno);
 		err = EIO;
 		goto out;
 	}
 	if ((err = fdip->tick->tk_aw_ohead.error)) {
-		SDT_PROBE2(fuse, , ipc, fdisp_wait_answ_error,
+		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
 			"IPC: setting status", fdip->tick->tk_aw_ohead.error);
 		/*
 	         * This means a "proper" fuse syscall error.
@@ -815,10 +1080,13 @@
 	ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket),
 	    fticket_ctor, fticket_dtor, fticket_init, fticket_fini,
 	    UMA_ALIGN_PTR, 0);
+	fuse_ticket_count = counter_u64_alloc(M_WAITOK);
+	counter_u64_zero(fuse_ticket_count);
 }
 
 void
 fuse_ipc_destroy(void)
 {
+	counter_u64_free(fuse_ticket_count);
 	uma_zdestroy(ticket_zone);
 }
Index: sys/fs/fuse/fuse_kernel.h
===================================================================
--- sys/fs/fuse/fuse_kernel.h
+++ sys/fs/fuse/fuse_kernel.h
@@ -1,6 +1,6 @@
 /*--
  * This file defines the kernel interface of FUSE
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
+ * Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
  *
  * This program can be distributed under the terms of the GNU GPL.
  * See the file COPYING.
@@ -34,69 +34,134 @@
  * $FreeBSD$
  */
 
-#ifndef linux
-#include <sys/types.h>
-#define __u64 uint64_t
-#define __u32 uint32_t
-#define __s32 int32_t
+/*
+ * This file defines the kernel interface of FUSE
+ *
+ * Protocol changelog:
+ *
+ * 7.9:
+ *  - new fuse_getattr_in input argument of GETATTR
+ *  - add lk_flags in fuse_lk_in
+ *  - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in
+ *  - add blksize field to fuse_attr
+ *  - add file flags field to fuse_read_in and fuse_write_in
+ *
+ * 7.10
+ *  - add nonseekable open flag
+ *
+ *  7.11
+ *  - add IOCTL message
+ *  - add unsolicited notification support
+ *
+ *  7.12
+ *  - add umask flag to input argument of open, mknod and mkdir
+ *  - add notification messages for invalidation of inodes and
+ *    directory entries
+ *
+ * 7.13
+ *  - make max number of background requests and congestion threshold
+ *    tunables
+ *
+ * 7.14
+ *  - add splice support to fuse device
+ *
+ * 7.15
+ *  - add store notify
+ *  - add retrieve notify
+ *
+ * 7.16
+ *  - add BATCH_FORGET request
+ *  - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
+ *    fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
+ *  - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ *  - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
+ *
+ * 7.18
+ *  - add FUSE_IOCTL_DIR flag
+ *  - add FUSE_NOTIFY_DELETE
+ *
+ * 7.19
+ *  - add FUSE_FALLOCATE
+ *
+ * 7.20
+ *  - add FUSE_AUTO_INVAL_DATA
+ * 7.21
+ *  - add FUSE_READDIRPLUS
+ *  - send the requested events in POLL request
+ *
+ * 7.22
+ *  - add FUSE_ASYNC_DIO
+ *
+ * 7.23
+ *  - add FUSE_WRITEBACK_CACHE
+ *  - add time_gran to fuse_init_out
+ *  - add reserved space to fuse_init_out
+ *  - add FATTR_CTIME
+ *  - add ctime and ctimensec to fuse_setattr_in
+ *  - add FUSE_RENAME2 request
+ *  - add FUSE_NO_OPEN_SUPPORT flag
+ */
+
+#ifndef _FUSE_FUSE_KERNEL_H
+#define _FUSE_FUSE_KERNEL_H
+ 
+#ifdef __linux__
+#include <linux/types.h>
 #else
-#include <asm/types.h>
-#include <linux/major.h>
+#include <sys/types.h>
 #endif
 
 /** Version number of this interface */
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 8
+#define FUSE_KERNEL_MINOR_VERSION 23
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
 
-/** The major number of the fuse character device */
-#define FUSE_MAJOR MISC_MAJOR
-
-/** The minor number of the fuse character device */
-#define FUSE_MINOR 229
-
 /* Make sure all structures are padded to 64bit boundary, so 32bit
    userspace works under 64bit kernels */
 
 struct fuse_attr {
-	__u64	ino;
-	__u64	size;
-	__u64	blocks;
-	__u64	atime;
-	__u64	mtime;
-	__u64	ctime;
-	__u32	atimensec;
-	__u32	mtimensec;
-	__u32	ctimensec;
-	__u32	mode;
-	__u32	nlink;
-	__u32	uid;
-	__u32	gid;
-	__u32	rdev;
+	uint64_t	ino;
+	uint64_t	size;
+	uint64_t	blocks;
+	uint64_t	atime;
+	uint64_t	mtime;
+	uint64_t	ctime;
+	uint32_t	atimensec;
+	uint32_t	mtimensec;
+	uint32_t	ctimensec;
+	uint32_t	mode;
+	uint32_t	nlink;
+	uint32_t	uid;
+	uint32_t	gid;
+	uint32_t	rdev;
+	uint32_t	blksize;
+	uint32_t	padding;
 };
 
 struct fuse_kstatfs {
-	__u64	blocks;
-	__u64	bfree;
-	__u64	bavail;
-	__u64	files;
-	__u64	ffree;
-	__u32	bsize;
-	__u32	namelen;
-	__u32	frsize;
-	__u32	padding;
-	__u32	spare[6];
+	uint64_t	blocks;
+	uint64_t	bfree;
+	uint64_t	bavail;
+	uint64_t	files;
+	uint64_t	ffree;
+	uint32_t	bsize;
+	uint32_t	namelen;
+	uint32_t	frsize;
+	uint32_t	padding;
+	uint32_t	spare[6];
 };
 
 struct fuse_file_lock {
-	__u64	start;
-	__u64	end;
-	__u32	type;
-	__u32	pid; /* tgid */
+	uint64_t	start;
+	uint64_t	end;
+	uint32_t	type;
+	uint32_t	pid; /* tgid */
 };
 
 /**
@@ -109,27 +174,128 @@
 #define FATTR_ATIME	(1 << 4)
 #define FATTR_MTIME	(1 << 5)
 #define FATTR_FH	(1 << 6)
+#define FATTR_ATIME_NOW	(1 << 7)
+#define FATTR_MTIME_NOW	(1 << 8)
+#define FATTR_LOCKOWNER	(1 << 9)
+#define FATTR_CTIME	(1 << 10)
 
 /**
  * Flags returned by the OPEN request
  *
  * FOPEN_DIRECT_IO: bypass page cache for this open file
  * FOPEN_KEEP_CACHE: don't invalidate the data cache on open
+ * FOPEN_NONSEEKABLE: the file is not seekable
  */
 #define FOPEN_DIRECT_IO		(1 << 0)
 #define FOPEN_KEEP_CACHE	(1 << 1)
+#define FOPEN_NONSEEKABLE	(1 << 2)
 
 /**
  * INIT request/reply flags
+ *
+ * FUSE_ASYNC_READ: asynchronous read requests
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported)
+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem
+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB
+ * FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device
+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device
+ * FUSE_SPLICE_READ: kernel supports splice read on the device
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories
+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
+ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
+ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission
+ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
+ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
+#define FUSE_FILE_OPS		(1 << 2)
+#define FUSE_ATOMIC_O_TRUNC	(1 << 3)
+#define FUSE_EXPORT_SUPPORT	(1 << 4)
+#define FUSE_BIG_WRITES		(1 << 5)
+#define FUSE_DONT_MASK		(1 << 6)
+#define FUSE_SPLICE_WRITE	(1 << 7)
+#define FUSE_SPLICE_MOVE	(1 << 8)
+#define FUSE_SPLICE_READ	(1 << 9)
+#define FUSE_FLOCK_LOCKS	(1 << 10)
+#define FUSE_HAS_IOCTL_DIR	(1 << 11)
+#define FUSE_AUTO_INVAL_DATA	(1 << 12)
+#define FUSE_DO_READDIRPLUS	(1 << 13)
+#define FUSE_READDIRPLUS_AUTO	(1 << 14)
+#define FUSE_ASYNC_DIO		(1 << 15)
+#define FUSE_WRITEBACK_CACHE	(1 << 16)
+#define FUSE_NO_OPEN_SUPPORT	(1 << 17)
 
+#ifdef linux
 /**
+ * CUSE INIT request/reply flags
+ *
+ * CUSE_UNRESTRICTED_IOCTL:  use unrestricted ioctl
+ */
+#define CUSE_UNRESTRICTED_IOCTL	(1 << 0)
+#endif /* linux */
+
+/**
  * Release flags
  */
 #define FUSE_RELEASE_FLUSH	(1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK	(1 << 1)
 
+/**
+ * Getattr flags
+ */
+#define FUSE_GETATTR_FH		(1 << 0)
+
+/**
+ * Lock flags
+ */
+#define FUSE_LK_FLOCK		(1 << 0)
+
+/**
+ * WRITE flags
+ *
+ * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed
+ * FUSE_WRITE_LOCKOWNER: lock_owner field is valid
+ */
+#define FUSE_WRITE_CACHE	(1 << 0)
+#define FUSE_WRITE_LOCKOWNER	(1 << 1)
+
+/**
+ * Read flags
+ */
+#define FUSE_READ_LOCKOWNER	(1 << 1)
+
+/**
+ * Ioctl flags
+ *
+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
+ * FUSE_IOCTL_RETRY: retry with new iovecs
+ * FUSE_IOCTL_32BIT: 32bit ioctl
+ * FUSE_IOCTL_DIR: is a directory
+ *
+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
+ */
+#define FUSE_IOCTL_COMPAT	(1 << 0)
+#define FUSE_IOCTL_UNRESTRICTED	(1 << 1)
+#define FUSE_IOCTL_RETRY	(1 << 2)
+#define FUSE_IOCTL_32BIT	(1 << 3)
+#define FUSE_IOCTL_DIR		(1 << 4)
+
+#define FUSE_IOCTL_MAX_IOV	256
+
+/**
+ * Poll flags
+ *
+ * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify
+ */
+#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
+
 enum fuse_opcode {
 	FUSE_LOOKUP	   = 1,
 	FUSE_FORGET	   = 2,  /* no reply */
@@ -167,107 +333,179 @@
 	FUSE_INTERRUPT     = 36,
 	FUSE_BMAP          = 37,
 	FUSE_DESTROY       = 38,
+	FUSE_IOCTL         = 39,
+	FUSE_POLL          = 40,
+	FUSE_NOTIFY_REPLY  = 41,
+	FUSE_BATCH_FORGET  = 42,
+	FUSE_FALLOCATE     = 43,
+	FUSE_READDIRPLUS   = 44,
+	FUSE_RENAME2       = 45,
+
+#ifdef linux
+	/* CUSE specific operations */
+	CUSE_INIT          = 4096,
+#endif /* linux */
 };
 
+enum fuse_notify_code {
+	FUSE_NOTIFY_POLL   = 1,
+	FUSE_NOTIFY_INVAL_INODE = 2,
+	FUSE_NOTIFY_INVAL_ENTRY = 3,
+	FUSE_NOTIFY_STORE = 4,
+	FUSE_NOTIFY_RETRIEVE = 5,
+	FUSE_NOTIFY_DELETE = 6,
+	FUSE_NOTIFY_CODE_MAX,
+};
+
 /* The read buffer is required to be at least 8k, but may be much larger */
 #define FUSE_MIN_READ_BUFFER 8192
 
+#define FUSE_COMPAT_ENTRY_OUT_SIZE 120
+
 struct fuse_entry_out {
-	__u64	nodeid;		/* Inode ID */
-	__u64	generation;	/* Inode generation: nodeid:gen must
-				   be unique for the fs's lifetime */
-	__u64	entry_valid;	/* Cache timeout for the name */
-	__u64	attr_valid;	/* Cache timeout for the attributes */
-	__u32	entry_valid_nsec;
-	__u32	attr_valid_nsec;
+	uint64_t	nodeid;		/* Inode ID */
+	uint64_t	generation;	/* Inode generation: nodeid:gen must
+					   be unique for the fs's lifetime */
+	uint64_t	entry_valid;	/* Cache timeout for the name */
+	uint64_t	attr_valid;	/* Cache timeout for the attributes */
+	uint32_t	entry_valid_nsec;
+	uint32_t	attr_valid_nsec;
 	struct fuse_attr attr;
 };
 
 struct fuse_forget_in {
-	__u64	nlookup;
+	uint64_t	nlookup;
 };
 
+struct fuse_forget_one {
+	uint64_t	nodeid;
+	uint64_t	nlookup;
+};
+
+struct fuse_batch_forget_in {
+	uint32_t	count;
+	uint32_t	dummy;
+};
+
+struct fuse_getattr_in {
+	uint32_t	getattr_flags;
+	uint32_t	dummy;
+	uint64_t	fh;
+};
+
+#define FUSE_COMPAT_ATTR_OUT_SIZE 96
+
 struct fuse_attr_out {
-	__u64	attr_valid;	/* Cache timeout for the attributes */
-	__u32	attr_valid_nsec;
-	__u32	dummy;
+	uint64_t	attr_valid;	/* Cache timeout for the attributes */
+	uint32_t	attr_valid_nsec;
+	uint32_t	dummy;
 	struct fuse_attr attr;
 };
 
+#define FUSE_COMPAT_MKNOD_IN_SIZE 8
+
+struct fuse_mknod_in {
+	uint32_t	mode;
+	uint32_t	rdev;
+	uint32_t	umask;
+	uint32_t	padding;
+};
+
 struct fuse_mkdir_in {
-	__u32	mode;
-	__u32	padding;
+	uint32_t	mode;
+	uint32_t	umask;
 };
 
 struct fuse_rename_in {
-	__u64	newdir;
+	uint64_t	newdir;
 };
 
+struct fuse_rename2_in {
+	uint64_t	newdir;
+	uint32_t	flags;
+	uint32_t	padding;
+};
+
 struct fuse_link_in {
-	__u64	oldnodeid;
+	uint64_t	oldnodeid;
 };
 
 struct fuse_setattr_in {
-	__u32	valid;
-	__u32	padding;
-	__u64	fh;
-	__u64	size;
-	__u64	unused1;
-	__u64	atime;
-	__u64	mtime;
-	__u64	unused2;
-	__u32	atimensec;
-	__u32	mtimensec;
-	__u32	unused3;
-	__u32	mode;
-	__u32	unused4;
-	__u32	uid;
-	__u32	gid;
-	__u32	unused5;
+	uint32_t	valid;
+	uint32_t	padding;
+	uint64_t	fh;
+	uint64_t	size;
+	uint64_t	lock_owner;
+	uint64_t	atime;
+	uint64_t	mtime;
+	uint64_t	ctime;
+	uint32_t	atimensec;
+	uint32_t	mtimensec;
+	uint32_t	ctimensec;
+	uint32_t	mode;
+	uint32_t	unused4;
+	uint32_t	uid;
+	uint32_t	gid;
+	uint32_t	unused5;
 };
 
 struct fuse_open_in {
-	__u32	flags;
-	__u32	mode;
+	uint32_t	flags;
+	uint32_t	unused;
 };
 
+struct fuse_create_in {
+	uint32_t	flags;
+	uint32_t	mode;
+	uint32_t	umask;
+	uint32_t	padding;
+};
+
 struct fuse_open_out {
-	__u64	fh;
-	__u32	open_flags;
-	__u32	padding;
+	uint64_t	fh;
+	uint32_t	open_flags;
+	uint32_t	padding;
 };
 
 struct fuse_release_in {
-	__u64	fh;
-	__u32	flags;
-	__u32	release_flags;
-	__u64	lock_owner;
+	uint64_t	fh;
+	uint32_t	flags;
+	uint32_t	release_flags;
+	uint64_t	lock_owner;
 };
 
 struct fuse_flush_in {
-	__u64	fh;
-	__u32	unused;
-	__u32	padding;
-	__u64	lock_owner;
+	uint64_t	fh;
+	uint32_t	unused;
+	uint32_t	padding;
+	uint64_t	lock_owner;
 };
 
 struct fuse_read_in {
-	__u64	fh;
-	__u64	offset;
-	__u32	size;
-	__u32	padding;
+	uint64_t	fh;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	read_flags;
+	uint64_t	lock_owner;
+	uint32_t	flags;
+	uint32_t	padding;
 };
 
+#define FUSE_COMPAT_WRITE_IN_SIZE 24
+
 struct fuse_write_in {
-	__u64	fh;
-	__u64	offset;
-	__u32	size;
-	__u32	write_flags;
+	uint64_t	fh;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	write_flags;
+	uint64_t	lock_owner;
+	uint32_t	flags;
+	uint32_t	padding;
 };
 
 struct fuse_write_out {
-	__u32	size;
-	__u32	padding;
+	uint32_t	size;
+	uint32_t	padding;
 };
 
 #define FUSE_COMPAT_STATFS_SIZE 48
@@ -277,40 +515,42 @@
 };
 
 struct fuse_fsync_in {
-	__u64	fh;
-	__u32	fsync_flags;
-	__u32	padding;
+	uint64_t	fh;
+	uint32_t	fsync_flags;
+	uint32_t	padding;
 };
 
+struct fuse_setxattr_in {
+	uint32_t	size;
+	uint32_t	flags;
+};
+
 struct fuse_listxattr_in {
-	__u32	size;
-	__u32	flags;
+	uint32_t	size;
+	uint32_t	padding;
 };
 
 struct fuse_listxattr_out {
-	__u32	size;
-	__u32	flags;
+	uint32_t	size;
+	uint32_t	padding;
 };
 
 struct fuse_getxattr_in {
-	__u32	size;
-	__u32	padding;
+	uint32_t	size;
+	uint32_t	padding;
 };
 
 struct fuse_getxattr_out {
-	__u32	size;
-	__u32	padding;
+	uint32_t	size;
+	uint32_t	padding;
 };
 
-struct fuse_setxattr_in {
-	__u32	size;
-	__u32	flags;
-};
-
 struct fuse_lk_in {
-	__u64	fh;
-	__u64	owner;
+	uint64_t	fh;
+	uint64_t	owner;
 	struct fuse_file_lock lk;
+	uint32_t	lk_flags;
+	uint32_t	padding;
 };
 
 struct fuse_lk_out {
@@ -318,66 +558,197 @@
 };
 
 struct fuse_access_in {
-	__u32	mask;
-	__u32	padding;
+	uint32_t	mask;
+	uint32_t	padding;
 };
 
 struct fuse_init_in {
-	__u32	major;
-	__u32	minor;
-	__u32	max_readahead;
-	__u32	flags;
+	uint32_t	major;
+	uint32_t	minor;
+	uint32_t	max_readahead;
+	uint32_t	flags;
 };
 
+#define FUSE_COMPAT_INIT_OUT_SIZE 8
+#define FUSE_COMPAT_22_INIT_OUT_SIZE 24
+
 struct fuse_init_out {
-	__u32	major;
-	__u32	minor;
-	__u32	max_readahead;
-	__u32	flags;
-	__u32	unused;
-	__u32	max_write;
+	uint32_t	major;
+	uint32_t	minor;
+	uint32_t	max_readahead;
+	uint32_t	flags;
+	uint16_t	max_background;
+	uint16_t	congestion_threshold;
+	uint32_t	max_write;
+	uint32_t	time_gran;
+	uint32_t	unused[9];
 };
 
+#ifdef linux
+#define CUSE_INIT_INFO_MAX 4096
+
+struct cuse_init_in {
+	uint32_t	major;
+	uint32_t	minor;
+	uint32_t	unused;
+	uint32_t	flags;
+};
+
+struct cuse_init_out {
+	uint32_t	major;
+	uint32_t	minor;
+	uint32_t	unused;
+	uint32_t	flags;
+	uint32_t	max_read;
+	uint32_t	max_write;
+	uint32_t	dev_major;		/* chardev major */
+	uint32_t	dev_minor;		/* chardev minor */
+	uint32_t	spare[10];
+};
+#endif /* linux */
+
 struct fuse_interrupt_in {
-	__u64	unique;
+	uint64_t	unique;
 };
 
 struct fuse_bmap_in {
-	__u64	block;
-	__u32	blocksize;
-	__u32	padding;
+	uint64_t	block;
+	uint32_t	blocksize;
+	uint32_t	padding;
 };
 
 struct fuse_bmap_out {
-	__u64	block;
+	uint64_t	block;
 };
 
+struct fuse_ioctl_in {
+	uint64_t	fh;
+	uint32_t	flags;
+	uint32_t	cmd;
+	uint64_t	arg;
+	uint32_t	in_size;
+	uint32_t	out_size;
+};
+
+struct fuse_ioctl_iovec {
+	uint64_t	base;
+	uint64_t	len;
+};
+
+struct fuse_ioctl_out {
+	int32_t		result;
+	uint32_t	flags;
+	uint32_t	in_iovs;
+	uint32_t	out_iovs;
+};
+
+struct fuse_poll_in {
+	uint64_t	fh;
+	uint64_t	kh;
+	uint32_t	flags;
+	uint32_t	events;
+};
+
+struct fuse_poll_out {
+	uint32_t	revents;
+	uint32_t	padding;
+};
+
+struct fuse_notify_poll_wakeup_out {
+	uint64_t	kh;
+};
+
+struct fuse_fallocate_in {
+	uint64_t	fh;
+	uint64_t	offset;
+	uint64_t	length;
+	uint32_t	mode;
+	uint32_t	padding;
+};
+
 struct fuse_in_header {
-	__u32	len;
-	__u32	opcode;
-	__u64	unique;
-	__u64	nodeid;
-	__u32	uid;
-	__u32	gid;
-	__u32	pid;
-	__u32	padding;
+	uint32_t	len;
+	uint32_t	opcode;
+	uint64_t	unique;
+	uint64_t	nodeid;
+	uint32_t	uid;
+	uint32_t	gid;
+	uint32_t	pid;
+	uint32_t	padding;
 };
 
 struct fuse_out_header {
-	__u32	len;
-	__s32	error;
-	__u64	unique;
+	uint32_t	len;
+	int32_t		error;
+	uint64_t	unique;
 };
 
 struct fuse_dirent {
-	__u64	ino;
-	__u64	off;
-	__u32	namelen;
-	__u32	type;
-	char name[0];
+	uint64_t	ino;
+	uint64_t	off;
+	uint32_t	namelen;
+	uint32_t	type;
+	char name[];
 };
 
 #define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
-#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
+#define FUSE_DIRENT_ALIGN(x) \
+	(((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
 #define FUSE_DIRENT_SIZE(d) \
 	FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+
+struct fuse_direntplus {
+	struct fuse_entry_out entry_out;
+	struct fuse_dirent dirent;
+};
+
+#define FUSE_NAME_OFFSET_DIRENTPLUS \
+	offsetof(struct fuse_direntplus, dirent.name)
+#define FUSE_DIRENTPLUS_SIZE(d) \
+	FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
+
+struct fuse_notify_inval_inode_out {
+	uint64_t	ino;
+	int64_t		off;
+	int64_t		len;
+};
+
+struct fuse_notify_inval_entry_out {
+	uint64_t	parent;
+	uint32_t	namelen;
+	uint32_t	padding;
+};
+
+struct fuse_notify_delete_out {
+	uint64_t	parent;
+	uint64_t	child;
+	uint32_t	namelen;
+	uint32_t	padding;
+};
+
+struct fuse_notify_store_out {
+	uint64_t	nodeid;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	padding;
+};
+
+struct fuse_notify_retrieve_out {
+	uint64_t	notify_unique;
+	uint64_t	nodeid;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	padding;
+};
+
+/* Matches the size of fuse_write_in */
+struct fuse_notify_retrieve_in {
+	uint64_t	dummy1;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	dummy2;
+	uint64_t	dummy3;
+	uint64_t	dummy4;
+};
+
+#endif /* _FUSE_FUSE_KERNEL_H */
Index: sys/fs/fuse/fuse_main.c
===================================================================
--- sys/fs/fuse/fuse_main.c
+++ sys/fs/fuse/fuse_main.c
@@ -33,6 +33,11 @@
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -77,6 +82,10 @@
 #include <sys/sysctl.h>
 
 #include "fuse.h"
+#include "fuse_file.h"
+#include "fuse_ipc.h"
+#include "fuse_internal.h"
+#include "fuse_node.h"
 
 static void fuse_bringdown(eventhandler_tag eh_tag);
 static int fuse_loader(struct module *m, int what, void *arg);
@@ -85,6 +94,7 @@
 
 extern struct vfsops fuse_vfsops;
 extern struct cdevsw fuse_cdevsw;
+extern struct vop_vector fuse_fifonops;
 extern struct vop_vector fuse_vnops;
 extern uma_zone_t fuse_pbuf_zone;
 
@@ -96,11 +106,13 @@
 	.vfc_flags = VFCF_JAIL | VFCF_SYNTHETIC
 };
 
+SYSCTL_NODE(_vfs, OID_AUTO, fusefs, CTLFLAG_RW, 0, "FUSE tunables");
+SYSCTL_NODE(_vfs_fusefs, OID_AUTO, stats, CTLFLAG_RW, 0, "FUSE statistics");
 SYSCTL_INT(_vfs_fusefs, OID_AUTO, kernelabi_major, CTLFLAG_RD,
     SYSCTL_NULL_INT_PTR, FUSE_KERNEL_VERSION, "FUSE kernel abi major version");
 SYSCTL_INT(_vfs_fusefs, OID_AUTO, kernelabi_minor, CTLFLAG_RD,
     SYSCTL_NULL_INT_PTR, FUSE_KERNEL_MINOR_VERSION, "FUSE kernel abi minor version");
-SDT_PROVIDER_DEFINE(fuse);
+SDT_PROVIDER_DEFINE(fusefs);
 
 /******************************
  *
@@ -111,7 +123,9 @@
 static void
 fuse_bringdown(eventhandler_tag eh_tag)
 {
-
+	fuse_node_destroy();
+	fuse_internal_destroy();
+	fuse_file_destroy();
 	fuse_ipc_destroy();
 	fuse_device_destroy();
 	mtx_destroy(&fuse_mtx);
@@ -132,16 +146,14 @@
 			return (err);
 		}
 		fuse_ipc_init();
+		fuse_file_init();
+		fuse_internal_init();
+		fuse_node_init();
 		fuse_pbuf_zone = pbuf_zsecond_create("fusepbuf", nswbuf / 2);
 
 		/* vfs_modevent ignores its first arg */
 		if ((err = vfs_modevent(NULL, what, &fuse_vfsconf)))
 			fuse_bringdown(eh_tag);
-		else
-			printf("fuse-freebsd: version %s, FUSE ABI %d.%d\n",
-			    FUSE_FREEBSD_VERSION,
-			    FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
-
 		break;
 	case MOD_UNLOAD:
 		if ((err = vfs_modevent(NULL, what, &fuse_vfsconf)))
Index: sys/fs/fuse/fuse_node.h
===================================================================
--- sys/fs/fuse/fuse_node.h
+++ sys/fs/fuse/fuse_node.h
@@ -32,6 +32,11 @@
  * 
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
+ *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -60,60 +65,121 @@
 #ifndef _FUSE_NODE_H_
 #define _FUSE_NODE_H_
 
+#include <sys/fnv_hash.h>
 #include <sys/types.h>
 #include <sys/mutex.h>
 
 #include "fuse_file.h"
 
-#define FN_REVOKED           0x00000020
-#define FN_FLUSHINPROG       0x00000040
-#define FN_FLUSHWANT         0x00000080
-#define FN_SIZECHANGE        0x00000100
-#define FN_DIRECTIO          0x00000200
+#define	FN_REVOKED		0x00000020
+#define	FN_FLUSHINPROG		0x00000040
+#define	FN_FLUSHWANT		0x00000080
+/* 
+ * Indicates that the file's size is dirty; the kernel has changed it but not
+ * yet send the change to the daemon.  When this bit is set, the
+ * cache_attrs.va_size field does not time out.
+ */
+#define	FN_SIZECHANGE		0x00000100
+#define	FN_DIRECTIO		0x00000200
+/* Indicates that parent_nid is valid */
+#define	FN_PARENT_NID		0x00000400
 
+/* 
+ * Indicates that the file's cached timestamps are dirty.  They will be flushed
+ * during the next SETATTR or WRITE.  Until then, the cached fields will not
+ * time out.
+ */
+#define	FN_MTIMECHANGE		0x00000800
+#define	FN_CTIMECHANGE		0x00001000
+
 struct fuse_vnode_data {
 	/** self **/
 	uint64_t	nid;
+	uint64_t	generation;
 
 	/** parent **/
-	/* XXXIP very likely to be stale, it's not updated in rename() */
 	uint64_t	parent_nid;
 
 	/** I/O **/
-	struct		fuse_filehandle fufh[FUFH_MAXTYPE];
+	/* List of file handles for all of the vnode's open file descriptors */
+	LIST_HEAD(, fuse_filehandle)	handles;
 
 	/** flags **/
 	uint32_t	flag;
 
 	/** meta **/
-	bool		valid_attr_cache;
+	/* The monotonic time after which the attr cache is invalid */
+	struct bintime	attr_cache_timeout;
+	/* 
+	 * Monotonic time after which the entry is invalid.  Used for lookups
+	 * by nodeid instead of pathname.
+	 */
+	struct bintime	entry_cache_timeout;
 	struct vattr	cached_attrs;
-	off_t		filesize;
 	uint64_t	nlookup;
 	enum vtype	vtype;
 };
 
+/*
+ * This overlays the fid structure (see mount.h). Mostly the same as the types
+ * used by UFS and ext2.
+ */
+struct fuse_fid {
+	uint16_t	len;	/* Length of structure. */
+	uint16_t	pad;	/* Force 32-bit alignment. */
+	uint32_t	gen;	/* Generation number. */
+	uint64_t	nid;	/* FUSE node id. */
+};
+
 #define VTOFUD(vp) \
 	((struct fuse_vnode_data *)((vp)->v_data))
 #define VTOI(vp)    (VTOFUD(vp)->nid)
-#define VTOVA(vp) \
-	(VTOFUD(vp)->valid_attr_cache ? \
-	&(VTOFUD(vp)->cached_attrs) : NULL)
+static inline struct vattr*
+VTOVA(struct vnode *vp)
+{
+	struct bintime now;
+
+	getbinuptime(&now);
+	if (bintime_cmp(&(VTOFUD(vp)->attr_cache_timeout), &now, >))
+		return &(VTOFUD(vp)->cached_attrs);
+	else
+		return NULL;
+}
+
+static inline void
+fuse_vnode_clear_attr_cache(struct vnode *vp)
+{
+	bintime_clear(&VTOFUD(vp)->attr_cache_timeout);
+}
+
+static uint32_t inline
+fuse_vnode_hash(uint64_t id)
+{
+	return (fnv_32_buf(&id, sizeof(id), FNV1_32_INIT));
+}
+
 #define VTOILLU(vp) ((uint64_t)(VTOFUD(vp) ? VTOI(vp) : 0))
 
 #define FUSE_NULL_ID 0
 
+extern struct vop_vector fuse_fifoops;
 extern struct vop_vector fuse_vnops;
 
+int fuse_vnode_cmp(struct vnode *vp, void *nidp);
+
 static inline void
 fuse_vnode_setparent(struct vnode *vp, struct vnode *dvp)
 {
 	if (dvp != NULL && vp->v_type == VDIR) {
 		MPASS(dvp->v_type == VDIR);
 		VTOFUD(vp)->parent_nid = VTOI(dvp);
+		VTOFUD(vp)->flag |= FN_PARENT_NID;
 	}
 }
 
+int fuse_vnode_size(struct vnode *vp, off_t *filesize, struct ucred *cred,
+	struct thread *td);
+
 void fuse_vnode_destroy(struct vnode *vp);
 
 int fuse_vnode_get(struct mount *mp, struct fuse_entry_out *feo,
@@ -123,10 +189,14 @@
 void fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags,
     struct thread *td);
 
-void fuse_vnode_refreshsize(struct vnode *vp, struct ucred *cred);
+int fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid);
 
-int fuse_vnode_savesize(struct vnode *vp, struct ucred *cred);
-
 int fuse_vnode_setsize(struct vnode *vp, off_t newsize);
 
+void fuse_vnode_undirty_cached_timestamps(struct vnode *vp);
+
+void fuse_vnode_update(struct vnode *vp, int flags);
+
+void fuse_node_init(void);
+void fuse_node_destroy(void);
 #endif /* _FUSE_NODE_H_ */
Index: sys/fs/fuse/fuse_node.c
===================================================================
--- sys/fs/fuse/fuse_node.c
+++ sys/fs/fuse/fuse_node.c
@@ -33,6 +33,11 @@
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -59,6 +64,7 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
+#include <sys/counter.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
@@ -77,8 +83,8 @@
 #include <sys/mount.h>
 #include <sys/sysctl.h>
 #include <sys/fcntl.h>
-#include <sys/fnv_hash.h>
 #include <sys/priv.h>
+#include <sys/buf.h>
 #include <security/mac/mac_framework.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
@@ -89,65 +95,40 @@
 #include "fuse_io.h"
 #include "fuse_ipc.h"
 
-SDT_PROVIDER_DECLARE(fuse);
+SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
-SDT_PROBE_DEFINE2(fuse, , node, trace, "int", "char*");
+SDT_PROBE_DEFINE2(fusefs, , node, trace, "int", "char*");
 
 MALLOC_DEFINE(M_FUSEVN, "fuse_vnode", "fuse vnode private data");
 
 static int sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS);
 
-static int fuse_node_count = 0;
+static counter_u64_t fuse_node_count;
 
-SYSCTL_INT(_vfs_fusefs, OID_AUTO, node_count, CTLFLAG_RD,
-    &fuse_node_count, 0, "Count of FUSE vnodes");
+SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, node_count, CTLFLAG_RD,
+    &fuse_node_count, "Count of FUSE vnodes");
 
 int	fuse_data_cache_mode = FUSE_CACHE_WT;
 
+/*
+ * DEPRECATED
+ * This sysctl is no longer needed as of fuse protocol 7.23.  Individual
+ * servers can select the cache behavior they need for each mountpoint:
+ * - writethrough: the default
+ * - writeback: set FUSE_WRITEBACK_CACHE in fuse_init_out.flags
+ * - uncached: set FOPEN_DIRECT_IO for every file
+ * The sysctl is retained primarily for use by jails supporting older FUSE
+ * protocols.  It may be removed entirely once FreeBSD 11.3 and 12.0 are EOL.
+ */
 SYSCTL_PROC(_vfs_fusefs, OID_AUTO, data_cache_mode, CTLTYPE_INT|CTLFLAG_RW,
     &fuse_data_cache_mode, 0, sysctl_fuse_cache_mode, "I",
     "Zero: disable caching of FUSE file data; One: write-through caching "
     "(default); Two: write-back caching (generally unsafe)");
 
-int	fuse_data_cache_invalidate = 0;
-
-SYSCTL_INT(_vfs_fusefs, OID_AUTO, data_cache_invalidate, CTLFLAG_RW,
-    &fuse_data_cache_invalidate, 0,
-    "If non-zero, discard cached clean file data when there are no active file"
-    " users");
-
-int	fuse_mmap_enable = 1;
-
-SYSCTL_INT(_vfs_fusefs, OID_AUTO, mmap_enable, CTLFLAG_RW,
-    &fuse_mmap_enable, 0,
-    "If non-zero, and data_cache_mode is also non-zero, enable mmap(2) of "
-    "FUSE files");
-
-int	fuse_refresh_size = 0;
-
-SYSCTL_INT(_vfs_fusefs, OID_AUTO, refresh_size, CTLFLAG_RW,
-    &fuse_refresh_size, 0,
-    "If non-zero, and no dirty file extension data is buffered, fetch file "
-    "size before write operations");
-
-int	fuse_sync_resize = 1;
-
-SYSCTL_INT(_vfs_fusefs, OID_AUTO, sync_resize, CTLFLAG_RW,
-    &fuse_sync_resize, 0,
-    "If a cached write extended a file, inform FUSE filesystem of the changed"
-    "size immediately subsequent to the issued writes");
-
-int	fuse_fix_broken_io = 0;
-
-SYSCTL_INT(_vfs_fusefs, OID_AUTO, fix_broken_io, CTLFLAG_RW,
-    &fuse_fix_broken_io, 0,
-    "If non-zero, print a diagnostic warning if a userspace filesystem returns"
-    " EIO on reads of recently extended portions of files");
-
 static int
 sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS)
 {
@@ -174,9 +155,8 @@
 fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data *fvdat,
     uint64_t nodeid, enum vtype vtyp)
 {
-	int i;
-
 	fvdat->nid = nodeid;
+	LIST_INIT(&fvdat->handles);
 	vattr_null(&fvdat->cached_attrs);
 	if (nodeid == FUSE_ROOT_ID) {
 		vp->v_vflag |= VV_ROOT;
@@ -184,10 +164,7 @@
 	vp->v_type = vtyp;
 	vp->v_data = fvdat;
 
-	for (i = 0; i < FUFH_MAXTYPE; i++)
-		fvdat->fufh[i].fh_type = FUFH_INVALID;
-
-	atomic_add_acq_int(&fuse_node_count, 1);
+	counter_u64_add(fuse_node_count, 1);
 }
 
 void
@@ -196,23 +173,21 @@
 	struct fuse_vnode_data *fvdat = vp->v_data;
 
 	vp->v_data = NULL;
+	KASSERT(LIST_EMPTY(&fvdat->handles),
+		("Destroying fuse vnode with open files!"));
 	free(fvdat, M_FUSEVN);
 
-	atomic_subtract_acq_int(&fuse_node_count, 1);
+	counter_u64_add(fuse_node_count, -1);
 }
 
-static int
+int
 fuse_vnode_cmp(struct vnode *vp, void *nidp)
 {
 	return (VTOI(vp) != *((uint64_t *)nidp));
 }
 
-static uint32_t inline
-fuse_vnode_hash(uint64_t id)
-{
-	return (fnv_32_buf(&id, sizeof(id), FNV1_32_INIT));
-}
-
+SDT_PROBE_DEFINE3(fusefs, , node, stale_vnode, "struct vnode*", "enum vtype",
+		"uint64_t");
 static int
 fuse_vnode_alloc(struct mount *mp,
     struct thread *td,
@@ -220,10 +195,12 @@
     enum vtype vtyp,
     struct vnode **vpp)
 {
+	struct fuse_data *data;
 	struct fuse_vnode_data *fvdat;
 	struct vnode *vp2;
 	int err = 0;
 
+	data = fuse_get_mpdata(mp);
 	if (vtyp == VNON) {
 		return EINVAL;
 	}
@@ -234,12 +211,34 @@
 		return (err);
 
 	if (*vpp) {
-		MPASS((*vpp)->v_type == vtyp && (*vpp)->v_data != NULL);
-		SDT_PROBE2(fuse, , node, trace, 1, "vnode taken from hash");
+		if ((*vpp)->v_type != vtyp) {
+			/*
+			 * STALE vnode!  This probably indicates a buggy
+			 * server, but it could also be the result of a race
+			 * between FUSE_LOOKUP and another client's
+			 * FUSE_UNLINK/FUSE_CREATE
+			 */
+			SDT_PROBE3(fusefs, , node, stale_vnode, *vpp, vtyp,
+				nodeid);
+			fuse_internal_vnode_disappear(*vpp);
+			lockmgr((*vpp)->v_vnlock, LK_RELEASE, NULL);
+			*vpp = NULL;
+			return (EAGAIN);
+		}
+		MPASS((*vpp)->v_data != NULL);
+		MPASS(VTOFUD(*vpp)->nid == nodeid);
+		SDT_PROBE2(fusefs, , node, trace, 1, "vnode taken from hash");
 		return (0);
 	}
 	fvdat = malloc(sizeof(*fvdat), M_FUSEVN, M_WAITOK | M_ZERO);
-	err = getnewvnode("fuse", mp, &fuse_vnops, vpp);
+	switch (vtyp) {
+	case VFIFO:
+		err = getnewvnode("fuse", mp, &fuse_fifoops, vpp);
+		break;
+	default:
+		err = getnewvnode("fuse", mp, &fuse_vnops, vpp);
+		break;
+	}
 	if (err) {
 		free(fvdat, M_FUSEVN);
 		return (err);
@@ -249,14 +248,23 @@
 	err = insmntque(*vpp, mp);
 	ASSERT_VOP_ELOCKED(*vpp, "fuse_vnode_alloc");
 	if (err) {
+		lockmgr((*vpp)->v_vnlock, LK_RELEASE, NULL);
 		free(fvdat, M_FUSEVN);
 		*vpp = NULL;
 		return (err);
 	}
+	/* Disallow async reads for fifos because UFS does.  I don't know why */
+	if (data->dataflags & FSESS_ASYNC_READ && vtyp != VFIFO)
+		VN_LOCK_ASHARE(*vpp);
+
 	err = vfs_hash_insert(*vpp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE,
 	    td, &vp2, fuse_vnode_cmp, &nodeid);
-	if (err)
+	if (err) {
+		lockmgr((*vpp)->v_vnlock, LK_RELEASE, NULL);
+		free(fvdat, M_FUSEVN);
+		*vpp = NULL;
 		return (err);
+	}
 	if (vp2 != NULL) {
 		*vpp = vp2;
 		return (0);
@@ -277,6 +285,11 @@
     enum vtype vtyp)
 {
 	struct thread *td = (cnp != NULL ? cnp->cn_thread : curthread);
+	/* 
+	 * feo should only be NULL for the root directory, which (when libfuse
+	 * is used) always has generation 0
+	 */
+	uint64_t generation = feo ? feo->generation : 0;
 	int err = 0;
 
 	err = fuse_vnode_alloc(mp, td, nodeid, vtyp, vpp);
@@ -284,22 +297,28 @@
 		return err;
 	}
 	if (dvp != NULL) {
-		MPASS((cnp->cn_flags & ISDOTDOT) == 0);
-		MPASS(!(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.'));
+		MPASS(cnp && (cnp->cn_flags & ISDOTDOT) == 0);
+		MPASS(cnp &&
+			!(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.'));
 		fuse_vnode_setparent(*vpp, dvp);
 	}
 	if (dvp != NULL && cnp != NULL && (cnp->cn_flags & MAKEENTRY) != 0 &&
 	    feo != NULL &&
 	    (feo->entry_valid != 0 || feo->entry_valid_nsec != 0)) {
+		struct timespec timeout;
+
 		ASSERT_VOP_LOCKED(*vpp, "fuse_vnode_get");
 		ASSERT_VOP_LOCKED(dvp, "fuse_vnode_get");
-		cache_enter(dvp, *vpp, cnp);
+
+		fuse_validity_2_timespec(feo, &timeout);
+		cache_enter_time(dvp, *vpp, cnp, &timeout, NULL);
 	}
 
+	VTOFUD(*vpp)->generation = generation;
 	/*
 	 * In userland, libfuse uses cached lookups for dot and dotdot entries,
 	 * thus it does not really bump the nlookup counter for forget.
-	 * Follow the same semantic and avoid tu bump it in order to keep
+	 * Follow the same semantic and avoid the bump in order to keep
 	 * nlookup counters consistent.
 	 */
 	if (cnp == NULL || ((cnp->cn_flags & ISDOTDOT) == 0 &&
@@ -309,44 +328,19 @@
 	return 0;
 }
 
+/*
+ * Called for every fusefs vnode open to initialize the vnode (not
+ * fuse_filehandle) for use
+ */
 void
 fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags, struct thread *td)
 {
-	/*
-	 * Funcation is called for every vnode open.
-	 * Merge fuse_open_flags it may be 0
-	 */
-	/*
-	 * Ideally speaking, direct io should be enabled on
-	 * fd's but do not see of any way of providing that
-	 * this implementation.
-	 *
-	 * Also cannot think of a reason why would two
-	 * different fd's on same vnode would like
-	 * have DIRECT_IO turned on and off. But linux
-	 * based implementation works on an fd not an
-	 * inode and provides such a feature.
-	 *
-	 * XXXIP: Handle fd based DIRECT_IO
-	 */
-	if (fuse_open_flags & FOPEN_DIRECT_IO) {
-		ASSERT_VOP_ELOCKED(vp, __func__);
-		VTOFUD(vp)->flag |= FN_DIRECTIO;
-		fuse_io_invalbuf(vp, td);
-	} else {
-		if ((fuse_open_flags & FOPEN_KEEP_CACHE) == 0)
-			fuse_io_invalbuf(vp, td);
-	        VTOFUD(vp)->flag &= ~FN_DIRECTIO;
-	}
-
-	if (vnode_vtype(vp) == VREG) {
-		/* XXXIP prevent getattr, by using cached node size */
+	if (vnode_vtype(vp) == VREG)
 		vnode_create_vobject(vp, 0, td);
-	}
 }
 
 int
-fuse_vnode_savesize(struct vnode *vp, struct ucred *cred)
+fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct thread *td = curthread;
@@ -375,10 +369,11 @@
 	fsai->valid = 0;
 
 	/* Truncate to a new value. */
-	fsai->size = fvdat->filesize;
+	MPASS((fvdat->flag & FN_SIZECHANGE) != 0);
+	fsai->size = fvdat->cached_attrs.va_size;
 	fsai->valid |= FATTR_SIZE;
 
-	fuse_filehandle_getrw(vp, FUFH_WRONLY, &fufh);
+	fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
 	if (fufh) {
 		fsai->fh = fufh->fh_id;
 		fsai->valid |= FATTR_FH;
@@ -391,38 +386,116 @@
 	return err;
 }
 
-void
-fuse_vnode_refreshsize(struct vnode *vp, struct ucred *cred)
-{
-
-	struct fuse_vnode_data *fvdat = VTOFUD(vp);
-	struct vattr va;
-
-	if ((fvdat->flag & FN_SIZECHANGE) != 0 ||
-	    fuse_data_cache_mode == FUSE_CACHE_UC ||
-	    (fuse_refresh_size == 0 && fvdat->filesize != 0))
-		return;
-
-	VOP_GETATTR(vp, &va, cred);
-	SDT_PROBE2(fuse, , node, trace, 1, "refreshed file size");
-}
-
+/*
+ * Adjust the vnode's size to a new value, such as that provided by
+ * FUSE_GETATTR.
+ */
 int
 fuse_vnode_setsize(struct vnode *vp, off_t newsize)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
+	struct vattr *attrs;
 	off_t oldsize;
+	size_t iosize;
+	struct buf *bp = NULL;
 	int err = 0;
 
 	ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize");
 
-	oldsize = fvdat->filesize;
-	fvdat->filesize = newsize;
-	fvdat->flag |= FN_SIZECHANGE;
+	iosize = fuse_iosize(vp);
+	oldsize = fvdat->cached_attrs.va_size;
+	fvdat->cached_attrs.va_size = newsize;
+	if ((attrs = VTOVA(vp)) != NULL)
+		attrs->va_size = newsize;
 
 	if (newsize < oldsize) {
+		daddr_t lbn;
+
 		err = vtruncbuf(vp, newsize, fuse_iosize(vp));
+		if (err)
+			goto out;
+		if (newsize % iosize == 0)
+			goto out;
+		/* 
+		 * Zero the contents of the last partial block.
+		 * Sure seems like vtruncbuf should do this for us.
+		 */
+
+		lbn = newsize / iosize;
+		bp = getblk(vp, lbn, iosize, PCATCH, 0, 0);
+		if (!bp) {
+			err = EINTR;
+			goto out;
+		}
+		if (!(bp->b_flags & B_CACHE))
+			goto out;	/* Nothing to do */
+		MPASS(bp->b_flags & B_VMIO);
+		vfs_bio_clrbuf(bp);
+		bp->b_dirtyend = MIN(bp->b_dirtyend, newsize - lbn * iosize);
 	}
+out:
+	if (bp)
+		brelse(bp);
 	vnode_pager_setsize(vp, newsize);
 	return err;
+}
+	
+/* Get the current, possibly dirty, size of the file */
+int
+fuse_vnode_size(struct vnode *vp, off_t *filesize, struct ucred *cred,
+	struct thread *td)
+{
+	struct fuse_vnode_data *fvdat = VTOFUD(vp);
+	int error = 0;
+
+	if (!(fvdat->flag & FN_SIZECHANGE) &&
+		(VTOVA(vp) == NULL || fvdat->cached_attrs.va_size == VNOVAL)) 
+		error = fuse_internal_do_getattr(vp, NULL, cred, td);
+
+	if (!error)
+		*filesize = fvdat->cached_attrs.va_size;
+
+	return error;
+}
+
+void
+fuse_vnode_undirty_cached_timestamps(struct vnode *vp)
+{
+	struct fuse_vnode_data *fvdat = VTOFUD(vp);
+
+	fvdat->flag &= ~(FN_MTIMECHANGE | FN_CTIMECHANGE);
+}
+
+/* Update a fuse file's cached timestamps */
+void
+fuse_vnode_update(struct vnode *vp, int flags)
+{
+	struct fuse_vnode_data *fvdat = VTOFUD(vp);
+	struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
+	struct timespec ts;
+
+	vfs_timestamp(&ts);
+
+	if (data->time_gran > 1)
+		ts.tv_nsec = rounddown(ts.tv_nsec, data->time_gran);
+
+	if (flags & FN_MTIMECHANGE)
+		fvdat->cached_attrs.va_mtime = ts;
+	if (flags & FN_CTIMECHANGE)
+		fvdat->cached_attrs.va_ctime = ts;
+	
+	fvdat->flag |= flags;
+}
+
+void
+fuse_node_init(void)
+{
+	fuse_node_count = counter_u64_alloc(M_WAITOK);
+	counter_u64_zero(fuse_node_count);
+}
+
+void
+fuse_node_destroy(void)
+{
+	counter_u64_free(fuse_node_count);
 }
Index: sys/fs/fuse/fuse_param.h
===================================================================
--- sys/fs/fuse/fuse_param.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright (c) 2007-2009 Google Inc. and Amit Singh
- * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * 
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- *   copyright notice, this list of conditions and the following disclaimer
- *   in the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Google Inc. nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$ 
- */
-
-#ifndef _FUSE_PARAM_H_
-#define _FUSE_PARAM_H_
-
-/*
- * This is the prefix ("fuse" by default) of the name of a FUSE device node
- * in devfs. The suffix is the device number. "/dev/fuse0" is the first FUSE
- * device by default. If you change the prefix from the default to something
- * else, the user-space FUSE library will need to know about it too.
- */
-#define FUSE_DEVICE_BASENAME               "fuse"
-
-/*
- * This is the number of /dev/fuse<n> nodes we will create. <n> goes from
- * 0 to (FUSE_NDEVICES - 1).
- */
-#define FUSE_NDEVICES                      16
-
-/*
- * This is the default block size of the virtual storage devices that are
- * implicitly implemented by the FUSE kernel extension. This can be changed
- * on a per-mount basis (there's one such virtual device for each mount).
- */
-#define FUSE_DEFAULT_BLOCKSIZE             4096
-
-/*
- * This is default I/O size used while accessing the virtual storage devices.
- * This can be changed on a per-mount basis.
- */
-#define FUSE_DEFAULT_IOSIZE                4096
-
-#ifdef KERNEL
-
-/*
- * This is the soft upper limit on the number of "request tickets" FUSE's
- * user-kernel IPC layer can have for a given mount. This can be modified
- * through the fuse.* sysctl interface.
- */
-#define FUSE_DEFAULT_MAX_FREE_TICKETS      1024
-
-#define FUSE_DEFAULT_IOV_PERMANENT_BUFSIZE (1L << 19)
-#define FUSE_DEFAULT_IOV_CREDIT            16
-
-#endif
-
-#define FUSE_LINK_MAX                      UINT32_MAX
-
-#endif /* _FUSE_PARAM_H_ */
Index: sys/fs/fuse/fuse_vfsops.c
===================================================================
--- sys/fs/fuse/fuse_vfsops.c
+++ sys/fs/fuse/fuse_vfsops.c
@@ -33,6 +33,11 @@
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -81,7 +86,6 @@
 #include <sys/fcntl.h>
 
 #include "fuse.h"
-#include "fuse_param.h"
 #include "fuse_node.h"
 #include "fuse_ipc.h"
 #include "fuse_internal.h"
@@ -89,13 +93,13 @@
 #include <sys/priv.h>
 #include <security/mac/mac_framework.h>
 
-SDT_PROVIDER_DECLARE(fuse);
+SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
-SDT_PROBE_DEFINE2(fuse, , vfsops, trace, "int", "char*");
+SDT_PROBE_DEFINE2(fusefs, , vfsops, trace, "int", "char*");
 
 /* This will do for privilege types for now */
 #ifndef PRIV_VFS_FUSE_ALLOWOTHER
@@ -108,30 +112,28 @@
 #define PRIV_VFS_FUSE_SYNC_UNMOUNT PRIV_VFS_MOUNT_NONUSER
 #endif
 
+static vfs_fhtovp_t fuse_vfsop_fhtovp;
 static vfs_mount_t fuse_vfsop_mount;
 static vfs_unmount_t fuse_vfsop_unmount;
 static vfs_root_t fuse_vfsop_root;
 static vfs_statfs_t fuse_vfsop_statfs;
+static vfs_vget_t fuse_vfsop_vget;
 
 struct vfsops fuse_vfsops = {
+	.vfs_fhtovp = fuse_vfsop_fhtovp,
 	.vfs_mount = fuse_vfsop_mount,
 	.vfs_unmount = fuse_vfsop_unmount,
 	.vfs_root = fuse_vfsop_root,
 	.vfs_statfs = fuse_vfsop_statfs,
+	.vfs_vget = fuse_vfsop_vget,
 };
 
-SYSCTL_INT(_vfs_fusefs, OID_AUTO, init_backgrounded, CTLFLAG_RD,
-    SYSCTL_NULL_INT_PTR, 1, "indicate async handshake");
 static int fuse_enforce_dev_perms = 0;
 
 SYSCTL_INT(_vfs_fusefs, OID_AUTO, enforce_dev_perms, CTLFLAG_RW,
     &fuse_enforce_dev_perms, 0,
     "enforce fuse device permissions for secondary mounts");
-static unsigned sync_unmount = 1;
 
-SYSCTL_UINT(_vfs_fusefs, OID_AUTO, sync_unmount, CTLFLAG_RW,
-    &sync_unmount, 0, "specify when to use synchronous unmount");
-
 MALLOC_DEFINE(M_FUSEVFS, "fuse_filesystem", "buffer for fuse vfs layer");
 
 static int
@@ -208,11 +210,90 @@
 	vfs_flagopt(opts, "__" #fnam, &__mntopts, fval);	\
 } while (0)
 
-SDT_PROBE_DEFINE1(fuse, , vfsops, mntopts, "uint64_t");
-SDT_PROBE_DEFINE4(fuse, , vfsops, mount_err, "char*", "struct fuse_data*",
+SDT_PROBE_DEFINE1(fusefs, , vfsops, mntopts, "uint64_t");
+SDT_PROBE_DEFINE4(fusefs, , vfsops, mount_err, "char*", "struct fuse_data*",
 	"struct mount*", "int");
 
 static int
+fuse_vfs_remount(struct mount *mp, struct thread *td, uint64_t mntopts,
+	uint32_t max_read, int daemon_timeout)
+{
+	int err = 0;
+	struct fuse_data *data = fuse_get_mpdata(mp);
+	/* Don't allow these options to be changed */
+	const static unsigned long long cant_update_opts = 
+		MNT_USER;	/* Mount owner must be the user running the daemon */
+
+	FUSE_LOCK();
+
+	if ((mp->mnt_flag ^ data->mnt_flag) & cant_update_opts) {
+		err = EOPNOTSUPP;
+		SDT_PROBE4(fusefs, , vfsops, mount_err,
+			"Can't change these mount options during remount",
+			data, mp, err);
+		goto out;
+	}
+	if (((data->dataflags ^ mntopts) & FSESS_MNTOPTS_MASK) ||
+	     (data->max_read != max_read) ||
+	     (data->daemon_timeout != daemon_timeout)) {
+		// TODO: allow changing options where it makes sense
+		err = EOPNOTSUPP;
+		SDT_PROBE4(fusefs, , vfsops, mount_err,
+			"Can't change fuse mount options during remount",
+			data, mp, err);
+		goto out;
+	}
+
+	if (fdata_get_dead(data)) {
+		err = ENOTCONN;
+		SDT_PROBE4(fusefs, , vfsops, mount_err,
+			"device is dead during mount", data, mp, err);
+		goto out;
+	}
+
+	/* Sanity + permission checks */
+	if (!data->daemoncred)
+		panic("fuse daemon found, but identity unknown");
+	if (mntopts & FSESS_DAEMON_CAN_SPY)
+		err = priv_check(td, PRIV_VFS_FUSE_ALLOWOTHER);
+	if (err == 0 && td->td_ucred->cr_uid != data->daemoncred->cr_uid)
+		/* are we allowed to do the first mount? */
+		err = priv_check(td, PRIV_VFS_FUSE_MOUNT_NONUSER);
+
+out:
+	FUSE_UNLOCK();
+	return err;
+}
+
+static int
+fuse_vfsop_fhtovp(struct mount *mp, struct fid *fhp, int flags,
+	struct vnode **vpp)
+{
+	struct fuse_fid *ffhp = (struct fuse_fid *)fhp;
+	struct fuse_vnode_data *fvdat;
+	struct vnode *nvp;
+	int error;
+
+	if (!(fuse_get_mpdata(mp)->dataflags & FSESS_EXPORT_SUPPORT))
+		return EOPNOTSUPP;
+
+	error = VFS_VGET(mp, ffhp->nid, LK_EXCLUSIVE, &nvp);
+	if (error) {
+		*vpp = NULLVP;
+		return (error);
+	}
+	fvdat = VTOFUD(nvp);
+	if (fvdat->generation != ffhp->gen ) {
+		vput(nvp);
+		*vpp = NULLVP;
+		return (ESTALE);
+	}
+	*vpp = nvp;
+	vnode_create_vobject(*vpp, 0, curthread);
+	return (0);
+}
+
+static int
 fuse_vfsop_mount(struct mount *mp)
 {
 	int err;
@@ -238,13 +319,6 @@
 	__mntopts = 0;
 	td = curthread;
 
-	if (mp->mnt_flag & MNT_UPDATE)
-		return EOPNOTSUPP;
-
-	MNT_ILOCK(mp);
-	mp->mnt_flag |= MNT_SYNCHRONOUS;
-	mp->mnt_data = NULL;
-	MNT_IUNLOCK(mp);
 	/* Get the new options passed to mount */
 	opts = mp->mnt_optnew;
 
@@ -255,19 +329,6 @@
 	if (!vfs_getopts(opts, "fspath", &err))
 		return err;
 
-	/* `from' contains the device name (eg. /dev/fuse0); REQUIRED */
-	fspec = vfs_getopts(opts, "from", &err);
-	if (!fspec)
-		return err;
-
-	/* `fd' contains the filedescriptor for this session; REQUIRED */
-	if (vfs_scanopt(opts, "fd", "%d", &fd) != 1)
-		return EINVAL;
-
-	err = fuse_getdevice(fspec, td, &fdev);
-	if (err != 0)
-		return err;
-
 	/*
 	 * With the help of underscored options the mount program
 	 * can inform us from the flags it sets by default
@@ -275,12 +336,6 @@
 	FUSE_FLAGOPT(allow_other, FSESS_DAEMON_CAN_SPY);
 	FUSE_FLAGOPT(push_symlinks_in, FSESS_PUSH_SYMLINKS_IN);
 	FUSE_FLAGOPT(default_permissions, FSESS_DEFAULT_PERMISSIONS);
-	FUSE_FLAGOPT(no_attrcache, FSESS_NO_ATTRCACHE);
-	FUSE_FLAGOPT(no_readahed, FSESS_NO_READAHEAD);
-	FUSE_FLAGOPT(no_datacache, FSESS_NO_DATACACHE);
-	FUSE_FLAGOPT(no_namecache, FSESS_NO_NAMECACHE);
-	FUSE_FLAGOPT(no_mmap, FSESS_NO_MMAP);
-	FUSE_FLAGOPT(brokenio, FSESS_BROKENIO);
 
 	(void)vfs_scanopt(opts, "max_read=", "%u", &max_read);
 	if (vfs_scanopt(opts, "timeout=", "%u", &daemon_timeout) == 1) {
@@ -293,11 +348,29 @@
 	}
 	subtype = vfs_getopts(opts, "subtype=", &err);
 
-	SDT_PROBE1(fuse, , vfsops, mntopts, mntopts);
+	SDT_PROBE1(fusefs, , vfsops, mntopts, mntopts);
 
+	if (mp->mnt_flag & MNT_UPDATE) {
+		return fuse_vfs_remount(mp, td, mntopts, max_read,
+			daemon_timeout);
+	}
+
+	/* `from' contains the device name (eg. /dev/fuse0); REQUIRED */
+	fspec = vfs_getopts(opts, "from", &err);
+	if (!fspec)
+		return err;
+
+	/* `fd' contains the filedescriptor for this session; REQUIRED */
+	if (vfs_scanopt(opts, "fd", "%d", &fd) != 1)
+		return EINVAL;
+
+	err = fuse_getdevice(fspec, td, &fdev);
+	if (err != 0)
+		return err;
+
 	err = fget(td, fd, &cap_read_rights, &fp);
 	if (err != 0) {
-		SDT_PROBE2(fuse, , vfsops, trace, 1,
+		SDT_PROBE2(fusefs, , vfsops, trace, 1,
 			"invalid or not opened device");
 		goto out;
 	}
@@ -307,16 +380,17 @@
 	td->td_fpop = fptmp;
 	fdrop(fp, td);
 	FUSE_LOCK();
-	if (err != 0 || data == NULL || data->mp != NULL) {
+
+	if (err != 0 || data == NULL) {
 		err = ENXIO;
-		SDT_PROBE4(fuse, , vfsops, mount_err,
+		SDT_PROBE4(fusefs, , vfsops, mount_err,
 			"invalid or not opened device", data, mp, err);
 		FUSE_UNLOCK();
 		goto out;
 	}
 	if (fdata_get_dead(data)) {
 		err = ENOTCONN;
-		SDT_PROBE4(fuse, , vfsops, mount_err,
+		SDT_PROBE4(fusefs, , vfsops, mount_err,
 			"device is dead during mount", data, mp, err);
 		FUSE_UNLOCK();
 		goto out;
@@ -338,12 +412,17 @@
 	data->dataflags |= mntopts;
 	data->max_read = max_read;
 	data->daemon_timeout = daemon_timeout;
+	data->mnt_flag = mp->mnt_flag & MNT_UPDATEMASK;
 	FUSE_UNLOCK();
 
 	vfs_getnewfsid(mp);
 	MNT_ILOCK(mp);
 	mp->mnt_data = data;
-	mp->mnt_flag |= MNT_LOCAL;
+	/* 
+	 * FUSE file systems can be either local or remote, but the kernel
+	 * can't tell the difference.
+	 */
+	mp->mnt_flag &= ~MNT_LOCAL;
 	mp->mnt_kern_flag |= MNTK_USES_BCACHE;
 	MNT_IUNLOCK(mp);
 	/* We need this here as this slot is used by getnewvnode() */
@@ -354,6 +433,7 @@
 	}
 	copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &len);
 	bzero(mp->mnt_stat.f_mntfromname + len, MNAMELEN - len);
+	mp->mnt_iosize_max = MAXPHYS;
 
 	/* Now handshaking with daemon */
 	fuse_internal_send_init(data, td);
@@ -366,9 +446,10 @@
 			 * Destroy device only if we acquired reference to
 			 * it
 			 */
-			SDT_PROBE4(fuse, , vfsops, mount_err,
+			SDT_PROBE4(fusefs, , vfsops, mount_err,
 				"mount failed, destroy device", data, mp, err);
 			data->mp = NULL;
+			mp->mnt_data = NULL;
 			fdata_trydestroy(data);
 		}
 		FUSE_UNLOCK();
@@ -412,11 +493,13 @@
 	if (fdata_get_dead(data)) {
 		goto alreadydead;
 	}
-	fdisp_init(&fdi, 0);
-	fdisp_make(&fdi, FUSE_DESTROY, mp, 0, td, NULL);
+	if (fsess_isimpl(mp, FUSE_DESTROY)) {
+		fdisp_init(&fdi, 0);
+		fdisp_make(&fdi, FUSE_DESTROY, mp, 0, td, NULL);
 
-	err = fdisp_wait_answ(&fdi);
-	fdisp_destroy(&fdi);
+		(void)fdisp_wait_answ(&fdi);
+		fdisp_destroy(&fdi);
+	}
 
 	fdata_set_dead(data);
 
@@ -429,7 +512,6 @@
 
 	MNT_ILOCK(mp);
 	mp->mnt_data = NULL;
-	mp->mnt_flag &= ~MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 
 	dev_rel(fdev);
@@ -437,7 +519,87 @@
 	return 0;
 }
 
+SDT_PROBE_DEFINE1(fusefs, , vfsops, invalidate_without_export,
+	"struct mount*");
 static int
+fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
+{
+	struct fuse_data *data = fuse_get_mpdata(mp);
+	uint64_t nodeid = ino;
+	struct thread *td = curthread;
+	struct fuse_dispatcher fdi;
+	struct fuse_entry_out *feo;
+	struct fuse_vnode_data *fvdat;
+	const char dot[] = ".";
+	off_t filesize;
+	enum vtype vtyp;
+	int error;
+
+	if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) {
+		/*
+		 * Unreachable unless you do something stupid, like export a
+		 * nullfs mount of a fusefs file system.
+		 */
+		SDT_PROBE1(fusefs, , vfsops, invalidate_without_export, mp);
+		return (EOPNOTSUPP);
+	}
+
+	error = fuse_internal_get_cached_vnode(mp, ino, flags, vpp);
+	if (error || *vpp != NULL)
+		return error;
+
+	/* Do a LOOKUP, using nodeid as the parent and "." as filename */
+	fdisp_init(&fdi, sizeof(dot));
+	fdisp_make(&fdi, FUSE_LOOKUP, mp, nodeid, td, td->td_ucred);
+	memcpy(fdi.indata, dot, sizeof(dot));
+	error = fdisp_wait_answ(&fdi);
+
+	if (error)
+		return error;
+
+	feo = (struct fuse_entry_out *)fdi.answ;
+	if (feo->nodeid == 0) {
+		/* zero nodeid means ENOENT and cache it */
+		error = ENOENT;
+		goto out;
+	}
+
+	vtyp = IFTOVT(feo->attr.mode);
+	error = fuse_vnode_get(mp, feo, nodeid, NULL, vpp, NULL, vtyp);
+	if (error)
+		goto out;
+	filesize = feo->attr.size;
+
+	/*
+	 * In the case where we are looking up a FUSE node represented by an
+	 * existing cached vnode, and the true size reported by FUSE_LOOKUP
+	 * doesn't match the vnode's cached size, then any cached writes beyond
+	 * the file's current size are lost.
+	 *
+	 * We can get here:
+	 * * following attribute cache expiration, or
+	 * * due a bug in the daemon, or
+	 */
+	fvdat = VTOFUD(*vpp);
+	if (vnode_isreg(*vpp) &&
+	    filesize != fvdat->cached_attrs.va_size &&
+	    fvdat->flag & FN_SIZECHANGE) {
+		printf("%s: WB cache incoherent on %s!\n", __func__,
+		    vnode_mount(*vpp)->mnt_stat.f_mntonname);
+
+		fvdat->flag &= ~FN_SIZECHANGE;
+	}
+
+	fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
+		feo->attr_valid_nsec, NULL);
+	fuse_validity_2_bintime(feo->entry_valid, feo->entry_valid_nsec,
+		&fvdat->entry_cache_timeout);
+out:
+	fdisp_destroy(&fdi);
+	return error;
+}
+
+static int
 fuse_vfsop_root(struct mount *mp, int lkflags, struct vnode **vpp)
 {
 	struct fuse_data *data = fuse_get_mpdata(mp);
@@ -454,13 +616,13 @@
 			FUSE_LOCK();
 			MPASS(data->vroot == NULL || data->vroot == *vpp);
 			if (data->vroot == NULL) {
-				SDT_PROBE2(fuse, , vfsops, trace, 1,
+				SDT_PROBE2(fusefs, , vfsops, trace, 1,
 					"new root vnode");
 				data->vroot = *vpp;
 				FUSE_UNLOCK();
 				vref(*vpp);
 			} else if (data->vroot != *vpp) {
-				SDT_PROBE2(fuse, , vfsops, trace, 1,
+				SDT_PROBE2(fusefs, , vfsops, trace, 1,
 					"root vnode race");
 				FUSE_UNLOCK();
 				VOP_UNLOCK(*vpp, 0);
@@ -523,7 +685,7 @@
 	sbp->f_files = 0;
 	sbp->f_ffree = 0;
 	sbp->f_namemax = 0;
-	sbp->f_bsize = FUSE_DEFAULT_BLOCKSIZE;
+	sbp->f_bsize = S_BLKSIZE;
 
 	return 0;
 }
Index: sys/fs/fuse/fuse_vnops.c
===================================================================
--- sys/fs/fuse/fuse_vnops.c
+++ sys/fs/fuse/fuse_vnops.c
@@ -33,6 +33,11 @@
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by BFF Storage Systems, LLC under
+ * sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -102,24 +107,30 @@
 #include "fuse_internal.h"
 #include "fuse_ipc.h"
 #include "fuse_node.h"
-#include "fuse_param.h"
 #include "fuse_io.h"
 
 #include <sys/priv.h>
 
-SDT_PROVIDER_DECLARE(fuse);
+/* Maximum number of hardlinks to a single FUSE file */
+#define FUSE_LINK_MAX                      UINT32_MAX
+
+SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
-SDT_PROBE_DEFINE2(fuse, , vnops, trace, "int", "char*");
+SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*");
 
 /* vnode ops */
 static vop_access_t fuse_vnop_access;
+static vop_advlock_t fuse_vnop_advlock;
+static vop_bmap_t fuse_vnop_bmap;
+static vop_close_t fuse_fifo_close;
 static vop_close_t fuse_vnop_close;
 static vop_create_t fuse_vnop_create;
 static vop_deleteextattr_t fuse_vnop_deleteextattr;
+static vop_fdatasync_t fuse_vnop_fdatasync;
 static vop_fsync_t fuse_vnop_fsync;
 static vop_getattr_t fuse_vnop_getattr;
 static vop_getextattr_t fuse_vnop_getextattr;
@@ -144,19 +155,44 @@
 static vop_symlink_t fuse_vnop_symlink;
 static vop_write_t fuse_vnop_write;
 static vop_getpages_t fuse_vnop_getpages;
-static vop_putpages_t fuse_vnop_putpages;
 static vop_print_t fuse_vnop_print;
+static vop_vptofh_t fuse_vnop_vptofh;
 
+struct vop_vector fuse_fifoops = {
+	.vop_default =		&fifo_specops,
+	.vop_access =		fuse_vnop_access,
+	.vop_close =		fuse_fifo_close,
+	.vop_fsync =		fuse_vnop_fsync,
+	.vop_getattr =		fuse_vnop_getattr,
+	.vop_inactive =		fuse_vnop_inactive,
+	.vop_pathconf =		fuse_vnop_pathconf,
+	.vop_print =		fuse_vnop_print,
+	.vop_read =		VOP_PANIC,
+	.vop_reclaim =		fuse_vnop_reclaim,
+	.vop_setattr =		fuse_vnop_setattr,
+	.vop_write =		VOP_PANIC,
+	.vop_vptofh =		fuse_vnop_vptofh,
+};
+
 struct vop_vector fuse_vnops = {
+	.vop_allocate =	VOP_EINVAL,
 	.vop_default = &default_vnodeops,
 	.vop_access = fuse_vnop_access,
+	.vop_advlock = fuse_vnop_advlock,
+	.vop_bmap = fuse_vnop_bmap,
 	.vop_close = fuse_vnop_close,
 	.vop_create = fuse_vnop_create,
 	.vop_deleteextattr = fuse_vnop_deleteextattr,
 	.vop_fsync = fuse_vnop_fsync,
+	.vop_fdatasync = fuse_vnop_fdatasync,
 	.vop_getattr = fuse_vnop_getattr,
 	.vop_getextattr = fuse_vnop_getextattr,
 	.vop_inactive = fuse_vnop_inactive,
+	/*
+	 * TODO: implement vop_ioctl after upgrading to protocol 7.16.
+	 * FUSE_IOCTL was added in 7.11, but 32-bit compat is broken until
+	 * 7.16.
+	 */
 	.vop_link = fuse_vnop_link,
 	.vop_listextattr = fuse_vnop_listextattr,
 	.vop_lookup = fuse_vnop_lookup,
@@ -164,6 +200,12 @@
 	.vop_mknod = fuse_vnop_mknod,
 	.vop_open = fuse_vnop_open,
 	.vop_pathconf = fuse_vnop_pathconf,
+	/*
+	 * TODO: implement vop_poll after upgrading to protocol 7.21.
+	 * FUSE_POLL was added in protocol 7.11, but it's kind of broken until
+	 * 7.21, which adds the ability for the client to choose which poll
+	 * events it wants, and for a client to deregister a file handle
+	 */
 	.vop_read = fuse_vnop_read,
 	.vop_readdir = fuse_vnop_readdir,
 	.vop_readlink = fuse_vnop_readlink,
@@ -177,41 +219,103 @@
 	.vop_symlink = fuse_vnop_symlink,
 	.vop_write = fuse_vnop_write,
 	.vop_getpages = fuse_vnop_getpages,
-	.vop_putpages = fuse_vnop_putpages,
 	.vop_print = fuse_vnop_print,
+	.vop_vptofh = fuse_vnop_vptofh,
 };
 
-static u_long fuse_lookup_cache_hits = 0;
+uma_zone_t fuse_pbuf_zone;
 
-SYSCTL_ULONG(_vfs_fusefs, OID_AUTO, lookup_cache_hits, CTLFLAG_RD,
-    &fuse_lookup_cache_hits, 0, "number of positive cache hits in lookup");
+#define fuse_vm_page_lock(m)		vm_page_lock((m));
+#define fuse_vm_page_unlock(m)		vm_page_unlock((m));
+#define fuse_vm_page_lock_queues()	((void)0)
+#define fuse_vm_page_unlock_queues()	((void)0)
 
-static u_long fuse_lookup_cache_misses = 0;
+/* Check permission for extattr operations, much like extattr_check_cred */
+static int
+fuse_extattr_check_cred(struct vnode *vp, int ns, struct ucred *cred,
+	struct thread *td, accmode_t accmode)
+{
+	struct mount *mp = vnode_mount(vp);
+	struct fuse_data *data = fuse_get_mpdata(mp);
 
-SYSCTL_ULONG(_vfs_fusefs, OID_AUTO, lookup_cache_misses, CTLFLAG_RD,
-    &fuse_lookup_cache_misses, 0, "number of cache misses in lookup");
+	/*
+	 * Kernel-invoked always succeeds.
+	 */
+	if (cred == NOCRED)
+		return (0);
 
-int	fuse_lookup_cache_enable = 1;
+	/*
+	 * Do not allow privileged processes in jail to directly manipulate
+	 * system attributes.
+	 */
+	switch (ns) {
+	case EXTATTR_NAMESPACE_SYSTEM:
+		if (data->dataflags & FSESS_DEFAULT_PERMISSIONS) {
+			return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM));
+		}
+		/* FALLTHROUGH */
+	case EXTATTR_NAMESPACE_USER:
+		return (fuse_internal_access(vp, accmode, td, cred));
+	default:
+		return (EPERM);
+	}
+}
 
-SYSCTL_INT(_vfs_fusefs, OID_AUTO, lookup_cache_enable, CTLFLAG_RW,
-    &fuse_lookup_cache_enable, 0, "if non-zero, enable lookup cache");
+/* Get a filehandle for a directory */
+static int
+fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp,
+	struct ucred *cred, pid_t pid)
+{
+	if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0)
+		return 0;
+	return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid);
+}
 
-/*
- * XXX: This feature is highly experimental and can bring to instabilities,
- * needs revisiting before to be enabled by default.
- */
-static int fuse_reclaim_revoked = 0;
+/* Send FUSE_FLUSH for this vnode */
+static int
+fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
+{
+	struct fuse_flush_in *ffi;
+	struct fuse_filehandle *fufh;
+	struct fuse_dispatcher fdi;
+	struct thread *td = curthread;
+	struct mount *mp = vnode_mount(vp);
+	int err;
 
-SYSCTL_INT(_vfs_fusefs, OID_AUTO, reclaim_revoked, CTLFLAG_RW,
-    &fuse_reclaim_revoked, 0, "");
+	if (!fsess_isimpl(vnode_mount(vp), FUSE_FLUSH))
+		return 0;
 
-uma_zone_t fuse_pbuf_zone;
+	err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
+	if (err)
+		return err;
 
-#define fuse_vm_page_lock(m)		vm_page_lock((m));
-#define fuse_vm_page_unlock(m)		vm_page_unlock((m));
-#define fuse_vm_page_lock_queues()	((void)0)
-#define fuse_vm_page_unlock_queues()	((void)0)
+	fdisp_init(&fdi, sizeof(*ffi));
+	fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred);
+	ffi = fdi.indata;
+	ffi->fh = fufh->fh_id;
+	/* 
+	 * If the file has a POSIX lock then we're supposed to set lock_owner.
+	 * If not, then lock_owner is undefined.  So we may as well always set
+	 * it.
+	 */
+	ffi->lock_owner = td->td_proc->p_pid;
 
+	err = fdisp_wait_answ(&fdi);
+	if (err == ENOSYS) {
+		fsess_set_notimpl(mp, FUSE_FLUSH);
+		err = 0;
+	}
+	fdisp_destroy(&fdi);
+	return err;
+}
+
+/* Close wrapper for fifos.  */
+static int
+fuse_fifo_close(struct vop_close_args *ap)
+{
+	return (fifo_specops.vop_close(ap));
+}
+
 /*
     struct vnop_access_args {
 	struct vnode *a_vp;
@@ -231,7 +335,6 @@
 	int accmode = ap->a_accmode;
 	struct ucred *cred = ap->a_cred;
 
-	struct fuse_access_param facp;
 	struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
 
 	int err;
@@ -254,15 +357,192 @@
 	if (vnode_islnk(vp)) {
 		return 0;
 	}
-	bzero(&facp, sizeof(facp));
 
-	err = fuse_internal_access(vp, accmode, &facp, ap->a_td, ap->a_cred);
+	err = fuse_internal_access(vp, accmode, ap->a_td, ap->a_cred);
 	return err;
 }
 
 /*
-    struct vnop_close_args {
+ * struct vop_advlock_args {
+ *	struct vop_generic_args a_gen;
+ *	struct vnode *a_vp;
+ *	void *a_id;
+ *	int a_op;
+ *	struct flock *a_fl;
+ *	int a_flags;
+ * }
+ */
+static int
+fuse_vnop_advlock(struct vop_advlock_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct flock *fl = ap->a_fl;
+	struct thread *td = curthread;
+	struct ucred *cred = td->td_ucred;
+	pid_t pid = td->td_proc->p_pid;
+	struct fuse_filehandle *fufh;
+	struct fuse_dispatcher fdi;
+	struct fuse_lk_in *fli;
+	struct fuse_lk_out *flo;
+	enum fuse_opcode op;
+	int dataflags, err;
+	int flags = ap->a_flags;
+
+	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
+
+	if (fuse_isdeadfs(vp)) {
+		return ENXIO;
+	}
+
+	if (!(dataflags & FSESS_POSIX_LOCKS))
+		return vop_stdadvlock(ap);
+	/* FUSE doesn't properly support flock until protocol 7.17 */
+	if (flags & F_FLOCK)
+		return vop_stdadvlock(ap);
+
+	err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid);
+	if (err)
+		return err;
+
+	fdisp_init(&fdi, sizeof(*fli));
+
+	switch(ap->a_op) {
+	case F_GETLK:
+		op = FUSE_GETLK;
+		break;
+	case F_SETLK:
+		op = FUSE_SETLK;
+		break;
+	case F_SETLKW:
+		op = FUSE_SETLKW;
+		break;
+	default:
+		return EINVAL;
+	}
+
+	fdisp_make_vp(&fdi, op, vp, td, cred);
+	fli = fdi.indata;
+	fli->fh = fufh->fh_id;
+	fli->owner = fl->l_pid;
+	fli->lk.start = fl->l_start;
+	if (fl->l_len != 0)
+		fli->lk.end = fl->l_start + fl->l_len - 1;
+	else
+		fli->lk.end = INT64_MAX;
+	fli->lk.type = fl->l_type;
+	fli->lk.pid = fl->l_pid;
+
+	err = fdisp_wait_answ(&fdi);
+	fdisp_destroy(&fdi);
+
+	if (err == 0 && op == FUSE_GETLK) {
+		flo = fdi.answ;
+		fl->l_type = flo->lk.type;
+		fl->l_pid = flo->lk.pid;
+		if (flo->lk.type != F_UNLCK) {
+			fl->l_start = flo->lk.start;
+			if (flo->lk.end == INT64_MAX)
+				fl->l_len = 0;
+			else
+				fl->l_len = flo->lk.end - flo->lk.start + 1;
+			fl->l_start = flo->lk.start;
+		}
+	}
+
+	return err;
+}
+
+/* {
 	struct vnode *a_vp;
+	daddr_t a_bn;
+	struct bufobj **a_bop;
+	daddr_t *a_bnp;
+	int *a_runp;
+	int *a_runb;
+} */
+static int
+fuse_vnop_bmap(struct vop_bmap_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct bufobj **bo = ap->a_bop;
+	struct thread *td = curthread;
+	struct mount *mp;
+	struct fuse_dispatcher fdi;
+	struct fuse_bmap_in *fbi;
+	struct fuse_bmap_out *fbo;
+	struct fuse_data *data;
+	uint64_t biosize;
+	off_t filesize;
+	daddr_t lbn = ap->a_bn;
+	daddr_t *pbn = ap->a_bnp;
+	int *runp = ap->a_runp;
+	int *runb = ap->a_runb;
+	int error = 0;
+	int maxrun;
+
+	if (fuse_isdeadfs(vp)) {
+		return ENXIO;
+	}
+
+	mp = vnode_mount(vp);
+	data = fuse_get_mpdata(mp);
+	biosize = fuse_iosize(vp);
+	maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1,
+		data->max_readahead_blocks);
+
+	if (bo != NULL)
+		*bo = &vp->v_bufobj;
+
+	/*
+	 * The FUSE_BMAP operation does not include the runp and runb
+	 * variables, so we must guess.  Report nonzero contiguous runs so
+	 * cluster_read will combine adjacent reads.  It's worthwhile to reduce
+	 * upcalls even if we don't know the true physical layout of the file.
+	 * 
+	 * FUSE file systems may opt out of read clustering in two ways:
+	 * * mounting with -onoclusterr
+	 * * Setting max_readahead <= maxbcachebuf during FUSE_INIT
+	 */
+	if (runb != NULL)
+		*runb = MIN(lbn, maxrun);
+	if (runp != NULL) {
+		error = fuse_vnode_size(vp, &filesize, td->td_ucred, td);
+		if (error == 0)
+			*runp = MIN(MAX(0, filesize / biosize - lbn - 1),
+				    maxrun);
+		else
+			*runp = 0;
+	}
+
+	if (fsess_isimpl(mp, FUSE_BMAP)) {
+		fdisp_init(&fdi, sizeof(*fbi));
+		fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred);
+		fbi = fdi.indata;
+		fbi->block = lbn;
+		fbi->blocksize = biosize;
+		error = fdisp_wait_answ(&fdi);
+		if (error == ENOSYS) {
+			fdisp_destroy(&fdi);
+			fsess_set_notimpl(mp, FUSE_BMAP);
+			error = 0;
+		} else {
+			fbo = fdi.answ;
+			if (error == 0 && pbn != NULL)
+				*pbn = fbo->block;
+			fdisp_destroy(&fdi);
+			return error;
+		}
+	}
+
+	/* If the daemon doesn't support BMAP, make up a sensible default */
+	if (pbn != NULL)
+		*pbn = lbn * btodb(biosize);
+	return (error);
+}
+
+/*
+    struct vop_close_args {
+	struct vnode *a_vp;
 	int  a_fflag;
 	struct ucred *a_cred;
 	struct thread *a_td;
@@ -274,39 +554,48 @@
 	struct vnode *vp = ap->a_vp;
 	struct ucred *cred = ap->a_cred;
 	int fflag = ap->a_fflag;
-	fufh_type_t fufh_type;
+	struct thread *td = ap->a_td;
+	pid_t pid = td->td_proc->p_pid;
+	int err = 0;
 
-	if (fuse_isdeadfs(vp)) {
+	if (fuse_isdeadfs(vp))
 		return 0;
-	}
-	if (vnode_isdir(vp)) {
-		if (fuse_filehandle_valid(vp, FUFH_RDONLY)) {
-			fuse_filehandle_close(vp, FUFH_RDONLY, NULL, cred);
-		}
+	if (vnode_isdir(vp))
 		return 0;
-	}
-	if (fflag & IO_NDELAY) {
+	if (fflag & IO_NDELAY)
 		return 0;
-	}
-	fufh_type = fuse_filehandle_xlate_from_fflags(fflag);
 
-	if (!fuse_filehandle_valid(vp, fufh_type)) {
-		int i;
-
-		for (i = 0; i < FUFH_MAXTYPE; i++)
-			if (fuse_filehandle_valid(vp, i))
-				break;
-		if (i == FUFH_MAXTYPE)
-			panic("FUSE: fufh type %d found to be invalid in close"
-			      " (fflag=0x%x)\n",
-			      fufh_type, fflag);
-	}
+	err = fuse_flush(vp, cred, pid, fflag);
+	/* TODO: close the file handle, if we're sure it's no longer used */
 	if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
-		fuse_vnode_savesize(vp, cred);
+		fuse_vnode_savesize(vp, cred, td->td_proc->p_pid);
 	}
-	return 0;
+	return err;
 }
 
+static void
+fdisp_make_mknod_for_fallback(
+	struct fuse_dispatcher *fdip,
+	struct componentname *cnp,
+	struct vnode *dvp,
+	uint64_t parentnid,
+	struct thread *td,
+	struct ucred *cred,
+	mode_t mode,
+	enum fuse_opcode *op)
+{
+	struct fuse_mknod_in *fmni;
+
+	fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1);
+	*op = FUSE_MKNOD;
+	fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred);
+	fmni = fdip->indata;
+	fmni->mode = mode;
+	fmni->rdev = 0;
+	memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr,
+	    cnp->cn_namelen);
+	((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0';
+}
 /*
     struct vnop_create_args {
 	struct vnode *a_dvp;
@@ -325,107 +614,169 @@
 	struct thread *td = cnp->cn_thread;
 	struct ucred *cred = cnp->cn_cred;
 
-	struct fuse_open_in *foi;
+	struct fuse_data *data;
+	struct fuse_create_in *fci;
 	struct fuse_entry_out *feo;
-	struct fuse_dispatcher fdi;
+	struct fuse_open_out *foo;
+	struct fuse_dispatcher fdi, fdi2;
 	struct fuse_dispatcher *fdip = &fdi;
+	struct fuse_dispatcher *fdip2 = NULL;
 
 	int err;
 
 	struct mount *mp = vnode_mount(dvp);
+	data = fuse_get_mpdata(mp);
 	uint64_t parentnid = VTOFUD(dvp)->nid;
 	mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
-	uint64_t x_fh_id;
-	uint32_t x_open_flags;
+	enum fuse_opcode op;
+	int flags;
 
-	if (fuse_isdeadfs(dvp)) {
+	if (fuse_isdeadfs(dvp))
 		return ENXIO;
-	}
+
+	/* FUSE expects sockets to be created with FUSE_MKNOD */
+	if (vap->va_type == VSOCK)
+		return fuse_internal_mknod(dvp, vpp, cnp, vap);
+
+	/* 
+	 * VOP_CREATE doesn't tell us the open(2) flags, so we guess.  Only a
+	 * writable mode makes sense, and we might as well include readability
+	 * too.
+	 */
+	flags = O_RDWR;
+
 	bzero(&fdi, sizeof(fdi));
 
-	/* XXX:	Will we ever want devices ? */
-	if ((vap->va_type != VREG)) {
-		printf("fuse_vnop_create: unsupported va_type %d\n",
-		    vap->va_type);
+	if (vap->va_type != VREG)
 		return (EINVAL);
-	}
 
-	fdisp_init(fdip, sizeof(*foi) + cnp->cn_namelen + 1);
-	if (!fsess_isimpl(mp, FUSE_CREATE)) {
-		SDT_PROBE2(fuse, , vnops, trace, 1,
-			"eh, daemon doesn't implement create?");
-		return (EINVAL);
-	}
-	fdisp_make(fdip, FUSE_CREATE, vnode_mount(dvp), parentnid, td, cred);
+	if (!fsess_isimpl(mp, FUSE_CREATE) || vap->va_type == VSOCK) {
+		/* Fallback to FUSE_MKNOD/FUSE_OPEN */
+		fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td,
+			cred, mode, &op);
+	} else {
+		/* Use FUSE_CREATE */
+		size_t insize;
 
-	foi = fdip->indata;
-	foi->mode = mode;
-	foi->flags = O_CREAT | O_RDWR;
+		op = FUSE_CREATE;
+		fdisp_init(fdip, sizeof(*fci) + cnp->cn_namelen + 1);
+		fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred);
+		fci = fdip->indata;
+		fci->mode = mode;
+		fci->flags = O_CREAT | flags;
+		if (fuse_libabi_geq(data, 7, 12)) {
+			insize = sizeof(*fci);
+			fci->umask = td->td_proc->p_fd->fd_cmask;
+		} else {
+			insize = sizeof(struct fuse_open_in);
+		}
 
-	memcpy((char *)fdip->indata + sizeof(*foi), cnp->cn_nameptr,
-	    cnp->cn_namelen);
-	((char *)fdip->indata)[sizeof(*foi) + cnp->cn_namelen] = '\0';
+		memcpy((char *)fdip->indata + insize, cnp->cn_nameptr,
+		    cnp->cn_namelen);
+		((char *)fdip->indata)[insize + cnp->cn_namelen] = '\0';
+	}
 
 	err = fdisp_wait_answ(fdip);
 
 	if (err) {
-		if (err == ENOSYS)
+		if (err == ENOSYS && op == FUSE_CREATE) {
 			fsess_set_notimpl(mp, FUSE_CREATE);
-		goto out;
+			fdisp_destroy(fdip);
+			fdisp_make_mknod_for_fallback(fdip, cnp, dvp,
+				parentnid, td, cred, mode, &op);
+			err = fdisp_wait_answ(fdip);
+		}
+		if (err)
+			goto out;
 	}
 
 	feo = fdip->answ;
 
-	if ((err = fuse_internal_checkentry(feo, VREG))) {
+	if ((err = fuse_internal_checkentry(feo, vap->va_type))) {
 		goto out;
 	}
-	err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, VREG);
+
+	if (op == FUSE_CREATE) {
+		foo = (struct fuse_open_out*)(feo + 1);
+	} else {
+		/* Issue a separate FUSE_OPEN */
+		struct fuse_open_in *foi;
+
+		fdip2 = &fdi2;
+		fdisp_init(fdip2, sizeof(*foi));
+		fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td,
+			cred);
+		foi = fdip2->indata;
+		foi->flags = flags;
+		err = fdisp_wait_answ(fdip2);
+		if (err)
+			goto out;
+		foo = fdip2->answ;
+	}
+	err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vap->va_type);
 	if (err) {
 		struct fuse_release_in *fri;
 		uint64_t nodeid = feo->nodeid;
-		uint64_t fh_id = ((struct fuse_open_out *)(feo + 1))->fh;
+		uint64_t fh_id = foo->fh;
 
 		fdisp_init(fdip, sizeof(*fri));
 		fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred);
 		fri = fdip->indata;
 		fri->fh = fh_id;
-		fri->flags = OFLAGS(mode);
+		fri->flags = flags;
 		fuse_insert_callback(fdip->tick, fuse_internal_forget_callback);
-		fuse_insert_message(fdip->tick);
-		return err;
+		fuse_insert_message(fdip->tick, false);
+		goto out;
 	}
 	ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create");
+	fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
+		feo->attr_valid_nsec, NULL);
 
-	fdip->answ = feo + 1;
-
-	x_fh_id = ((struct fuse_open_out *)(feo + 1))->fh;
-	x_open_flags = ((struct fuse_open_out *)(feo + 1))->open_flags;
-	fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, x_fh_id);
-	fuse_vnode_open(*vpp, x_open_flags, td);
+	fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td, cred, foo);
+	fuse_vnode_open(*vpp, foo->open_flags, td);
+	/* 
+	 * Purge the parent's attribute cache because the daemon should've
+	 * updated its mtime and ctime
+	 */
+	fuse_vnode_clear_attr_cache(dvp);
 	cache_purge_negative(dvp);
 
 out:
+	if (fdip2)
+		fdisp_destroy(fdip2);
 	fdisp_destroy(fdip);
 	return err;
 }
 
 /*
- * Our vnop_fsync roughly corresponds to the FUSE_FSYNC method. The Linux
- * version of FUSE also has a FUSE_FLUSH method.
- *
- * On Linux, fsync() synchronizes a file's complete in-core state with that
- * on disk. The call is not supposed to return until the system has completed
- * that action or until an error is detected.
- *
- * Linux also has an fdatasync() call that is similar to fsync() but is not
- * required to update the metadata such as access time and modification time.
- */
+    struct vnop_fdatasync_args {
+	struct vop_generic_args a_gen;
+	struct vnode * a_vp;
+	struct thread * a_td;
+    };
+*/
+static int
+fuse_vnop_fdatasync(struct vop_fdatasync_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct thread *td = ap->a_td;
+	int waitfor = MNT_WAIT;
 
+	int err = 0;
+
+	if (fuse_isdeadfs(vp)) {
+		return 0;
+	}
+	if ((err = vop_stdfdatasync_buf(ap)))
+		return err;
+
+	return fuse_internal_fsync(vp, td, waitfor, true);
+}
+
 /*
     struct vnop_fsync_args {
-	struct vnodeop_desc *a_desc;
+	struct vop_generic_args a_gen;
 	struct vnode * a_vp;
-	struct ucred * a_cred;
 	int  a_waitfor;
 	struct thread * a_td;
     };
@@ -435,31 +786,16 @@
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
+	int waitfor = ap->a_waitfor;
+	int err = 0;
 
-	struct fuse_filehandle *fufh;
-	struct fuse_vnode_data *fvdat = VTOFUD(vp);
-
-	int type, err = 0;
-
 	if (fuse_isdeadfs(vp)) {
 		return 0;
 	}
 	if ((err = vop_stdfsync(ap)))
 		return err;
 
-	if (!fsess_isimpl(vnode_mount(vp),
-	    (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
-		goto out;
-	}
-	for (type = 0; type < FUFH_MAXTYPE; type++) {
-		fufh = &(fvdat->fufh[type]);
-		if (FUFH_IS_VALID(fufh)) {
-			fuse_internal_fsync(vp, td, NULL, fufh);
-		}
-	}
-
-out:
-	return 0;
+	return fuse_internal_fsync(vp, td, waitfor, false);
 }
 
 /*
@@ -477,12 +813,9 @@
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
-	struct fuse_vnode_data *fvdat = VTOFUD(vp);
-	struct fuse_attr_out *fao;
 
 	int err = 0;
 	int dataflags;
-	struct fuse_dispatcher fdi;
 
 	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
 
@@ -497,48 +830,14 @@
 			goto fake;
 		}
 	}
-	fdisp_init(&fdi, 0);
-	if ((err = fdisp_simple_putget_vp(&fdi, FUSE_GETATTR, vp, td, cred))) {
-		if ((err == ENOTCONN) && vnode_isvroot(vp)) {
-			/* see comment in fuse_vfsop_statfs() */
-			fdisp_destroy(&fdi);
-			goto fake;
-		}
-		if (err == ENOENT) {
-			fuse_internal_vnode_disappear(vp);
-		}
-		goto out;
+	err = fuse_internal_getattr(vp, vap, cred, td);
+	if (err == ENOTCONN && vnode_isvroot(vp)) {
+		/* see comment in fuse_vfsop_statfs() */
+		goto fake;
+	} else {
+		return err;
 	}
 
-	fao = (struct fuse_attr_out *)fdi.answ;
-	fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
-		fao->attr_valid_nsec, vap);
-	if (vap->va_type != vnode_vtype(vp)) {
-		fuse_internal_vnode_disappear(vp);
-		err = ENOENT;
-		goto out;
-	}
-	if ((fvdat->flag & FN_SIZECHANGE) != 0)
-		vap->va_size = fvdat->filesize;
-
-	if (vnode_isreg(vp) && (fvdat->flag & FN_SIZECHANGE) == 0) {
-		/*
-	         * This is for those cases when the file size changed without us
-	         * knowing, and we want to catch up.
-	         */
-		off_t new_filesize = ((struct fuse_attr_out *)
-				      fdi.answ)->attr.size;
-
-		if (fvdat->filesize != new_filesize) {
-			fuse_vnode_setsize(vp, new_filesize);
-			fvdat->flag &= ~FN_SIZECHANGE;
-		}
-	}
-
-out:
-	fdisp_destroy(&fdi);
-	return err;
-
 fake:
 	bzero(vap, sizeof(*vap));
 	vap->va_type = vnode_vtype(vp);
@@ -559,31 +858,27 @@
 	struct thread *td = ap->a_td;
 
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
-	struct fuse_filehandle *fufh = NULL;
+	struct fuse_filehandle *fufh, *fufh_tmp;
 
-	int type, need_flush = 1;
+	int need_flush = 1;
 
-	for (type = 0; type < FUFH_MAXTYPE; type++) {
-		fufh = &(fvdat->fufh[type]);
-		if (FUFH_IS_VALID(fufh)) {
-			if (need_flush && vp->v_type == VREG) {
-				if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
-					fuse_vnode_savesize(vp, NULL);
-				}
-				if (fuse_data_cache_invalidate ||
-				    (fvdat->flag & FN_REVOKED) != 0)
-					fuse_io_invalbuf(vp, td);
-				else
-					fuse_io_flushbuf(vp, MNT_WAIT, td);
-				need_flush = 0;
+	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
+		if (need_flush && vp->v_type == VREG) {
+			if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
+				fuse_vnode_savesize(vp, NULL, 0);
 			}
-			fuse_filehandle_close(vp, type, td, NULL);
+			if ((fvdat->flag & FN_REVOKED) != 0)
+				fuse_io_invalbuf(vp, td);
+			else
+				fuse_io_flushbuf(vp, MNT_WAIT, td);
+			need_flush = 0;
 		}
+		fuse_filehandle_close(vp, fufh, td, NULL);
 	}
 
-	if ((fvdat->flag & FN_REVOKED) != 0 && fuse_reclaim_revoked) {
+	if ((fvdat->flag & FN_REVOKED) != 0)
 		vrecycle(vp);
-	}
+
 	return 0;
 }
 
@@ -635,11 +930,39 @@
 	feo = fdi.answ;
 
 	err = fuse_internal_checkentry(feo, vnode_vtype(vp));
+	if (!err) {
+		/* 
+		 * Purge the parent's attribute cache because the daemon
+		 * should've updated its mtime and ctime
+		 */
+		fuse_vnode_clear_attr_cache(tdvp);
+		fuse_internal_cache_attrs(vp, &feo->attr, feo->attr_valid,
+			feo->attr_valid_nsec, NULL);
+	}
 out:
 	fdisp_destroy(&fdi);
 	return err;
 }
 
+struct fuse_lookup_alloc_arg {
+	struct fuse_entry_out *feo;
+	struct componentname *cnp;
+	uint64_t nid;
+	enum vtype vtyp;
+};
+
+/* Callback for vn_get_ino */
+static int
+fuse_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
+{
+	struct fuse_lookup_alloc_arg *flaa = arg;
+
+	return fuse_vnode_get(mp, flaa->feo, flaa->nid, NULL, vpp, flaa->cnp,
+		flaa->vtyp);
+}
+
+SDT_PROBE_DEFINE3(fusefs, , vnops, cache_lookup,
+	"int", "struct timespec*", "struct timespec*");
 /*
     struct vnop_lookup_args {
 	struct vnodeop_desc *a_desc;
@@ -668,268 +991,146 @@
 	struct vnode *vp = NULL;
 
 	struct fuse_dispatcher fdi;
-	enum fuse_opcode op;
+	bool did_lookup = false;
+	struct fuse_entry_out *feo = NULL;
+	enum vtype vtyp;	/* vnode type of target */
+	off_t filesize;		/* filesize of target */
 
 	uint64_t nid;
-	struct fuse_access_param facp;
 
 	if (fuse_isdeadfs(dvp)) {
 		*vpp = NULL;
 		return ENXIO;
 	}
-	if (!vnode_isdir(dvp)) {
+	if (!vnode_isdir(dvp))
 		return ENOTDIR;
-	}
-	if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP)) {
+
+	if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP))
 		return EROFS;
-	}
-	/*
-	 * We do access check prior to doing anything else only in the case
-	 * when we are at fs root (we'd like to say, "we are at the first
-	 * component", but that's not exactly the same... nevermind).
-	 * See further comments at further access checks.
-	 */
 
-	bzero(&facp, sizeof(facp));
-	if (vnode_isvroot(dvp)) {	/* early permission check hack */
-		if ((err = fuse_internal_access(dvp, VEXEC, &facp, td, cred))) {
-			return err;
-		}
-	}
+	if ((err = fuse_internal_access(dvp, VEXEC, td, cred)))
+		return err;
+
 	if (flags & ISDOTDOT) {
+		KASSERT(VTOFUD(dvp)->flag & FN_PARENT_NID,
+			("Looking up .. is TODO"));
 		nid = VTOFUD(dvp)->parent_nid;
-		if (nid == 0) {
+		if (nid == 0)
 			return ENOENT;
-		}
-		fdisp_init(&fdi, 0);
-		op = FUSE_GETATTR;
-		goto calldaemon;
+		/* .. is obviously a directory */
+		vtyp = VDIR;
+		filesize = 0;
 	} else if (cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.') {
 		nid = VTOI(dvp);
-		fdisp_init(&fdi, 0);
-		op = FUSE_GETATTR;
-		goto calldaemon;
-	} else if (fuse_lookup_cache_enable) {
-		err = cache_lookup(dvp, vpp, cnp, NULL, NULL);
-		switch (err) {
+		/* . is obviously a directory */
+		vtyp = VDIR;
+		filesize = 0;
+	} else {
+		struct timespec now, timeout;
 
+		err = cache_lookup(dvp, vpp, cnp, &timeout, NULL);
+		getnanouptime(&now);
+		SDT_PROBE3(fusefs, , vnops, cache_lookup, err, &timeout, &now);
+		switch (err) {
 		case -1:		/* positive match */
-			atomic_add_acq_long(&fuse_lookup_cache_hits, 1);
+			if (timespeccmp(&timeout, &now, >)) {
+				counter_u64_add(fuse_lookup_cache_hits, 1);
+			} else {
+				/* Cache timeout */
+				counter_u64_add(fuse_lookup_cache_misses, 1);
+				bintime_clear(
+					&VTOFUD(*vpp)->entry_cache_timeout);
+				cache_purge(*vpp);
+				if (dvp != *vpp)
+					vput(*vpp);
+				else 
+					vrele(*vpp);
+				*vpp = NULL;
+				break;
+			}
 			return 0;
 
 		case 0:		/* no match in cache */
-			atomic_add_acq_long(&fuse_lookup_cache_misses, 1);
+			counter_u64_add(fuse_lookup_cache_misses, 1);
 			break;
 
 		case ENOENT:		/* negative match */
+			getnanouptime(&now);
+			if (timespeccmp(&timeout, &now, <=)) {
+				/* Cache timeout */
+				cache_purge_negative(dvp);
+				break;
+			}
 			/* fall through */
 		default:
 			return err;
 		}
-	}
-	nid = VTOI(dvp);
-	fdisp_init(&fdi, cnp->cn_namelen + 1);
-	op = FUSE_LOOKUP;
 
-calldaemon:
-	fdisp_make(&fdi, op, mp, nid, td, cred);
+		nid = VTOI(dvp);
+		fdisp_init(&fdi, cnp->cn_namelen + 1);
+		fdisp_make(&fdi, FUSE_LOOKUP, mp, nid, td, cred);
 
-	if (op == FUSE_LOOKUP) {
 		memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
 		((char *)fdi.indata)[cnp->cn_namelen] = '\0';
-	}
-	lookup_err = fdisp_wait_answ(&fdi);
+		lookup_err = fdisp_wait_answ(&fdi);
+		did_lookup = true;
 
-	if ((op == FUSE_LOOKUP) && !lookup_err) {	/* lookup call succeeded */
-		nid = ((struct fuse_entry_out *)fdi.answ)->nodeid;
-		if (!nid) {
-			/*
-	                 * zero nodeid is the same as "not found",
-	                 * but it's also cacheable (which we keep
-	                 * keep on doing not as of writing this)
-	                 */
-			lookup_err = ENOENT;
-		} else if (nid == FUSE_ROOT_ID) {
-			lookup_err = EINVAL;
+		if (!lookup_err) {
+			/* lookup call succeeded */
+			feo = (struct fuse_entry_out *)fdi.answ;
+			nid = feo->nodeid;
+			if (nid == 0) {
+				/* zero nodeid means ENOENT and cache it */
+				struct timespec timeout;
+
+				fdi.answ_stat = ENOENT;
+				lookup_err = ENOENT;
+				if (cnp->cn_flags & MAKEENTRY) {
+					fuse_validity_2_timespec(feo, &timeout);
+					cache_enter_time(dvp, *vpp, cnp,
+						&timeout, NULL);
+				}
+			} else if (nid == FUSE_ROOT_ID) {
+				lookup_err = EINVAL;
+			}
+			vtyp = IFTOVT(feo->attr.mode);
+			filesize = feo->attr.size;
 		}
+		if (lookup_err && (!fdi.answ_stat || lookup_err != ENOENT)) {
+			fdisp_destroy(&fdi);
+			return lookup_err;
+		}
 	}
-	if (lookup_err &&
-	    (!fdi.answ_stat || lookup_err != ENOENT || op != FUSE_LOOKUP)) {
-		fdisp_destroy(&fdi);
-		return lookup_err;
-	}
 	/* lookup_err, if non-zero, must be ENOENT at this point */
 
 	if (lookup_err) {
+		/* Entry not found */
+		if ((nameiop == CREATE || nameiop == RENAME) && islastcn) {
+			err = fuse_internal_access(dvp, VWRITE, td, cred);
+			if (!err) {
+				/*
+				 * Set the SAVENAME flag to hold onto the
+				 * pathname for use later in VOP_CREATE or
+				 * VOP_RENAME.
+				 */
+				cnp->cn_flags |= SAVENAME;
 
-		if ((nameiop == CREATE || nameiop == RENAME) && islastcn
-		     /* && directory dvp has not been removed */ ) {
-
-			if (vfs_isrdonly(mp)) {
-				err = EROFS;
-				goto out;
+				err = EJUSTRETURN;
 			}
-#if 0 /* THINK_ABOUT_THIS */
-			if ((err = fuse_internal_access(dvp, VWRITE, cred, td, &facp))) {
-				goto out;
-			}
-#endif
-
-			/*
-	                 * Possibly record the position of a slot in the
-	                 * directory large enough for the new component name.
-	                 * This can be recorded in the vnode private data for
-	                 * dvp. Set the SAVENAME flag to hold onto the
-	                 * pathname for use later in VOP_CREATE or VOP_RENAME.
-	                 */
-			cnp->cn_flags |= SAVENAME;
-
-			err = EJUSTRETURN;
-			goto out;
-		}
-		/* Consider inserting name into cache. */
-
-		/*
-	         * No we can't use negative caching, as the fs
-	         * changes are out of our control.
-	         * False positives' falseness turns out just as things
-	         * go by, but false negatives' falseness doesn't.
-	         * (and aiding the caching mechanism with extra control
-	         * mechanisms comes quite close to beating the whole purpose
-	         * caching...)
-	         */
-#if 0
-		if ((cnp->cn_flags & MAKEENTRY) != 0) {
-			SDT_PROBE2(fuse, , vnops, trace, 1,
-				"inserting NULL into cache");
-			cache_enter(dvp, NULL, cnp);
-		}
-#endif
-		err = ENOENT;
-		goto out;
-
-	} else {
-
-		/* !lookup_err */
-
-		struct fuse_entry_out *feo = NULL;
-		struct fuse_attr *fattr = NULL;
-
-		if (op == FUSE_GETATTR) {
-			fattr = &((struct fuse_attr_out *)fdi.answ)->attr;
 		} else {
-			feo = (struct fuse_entry_out *)fdi.answ;
-			fattr = &(feo->attr);
+			err = ENOENT;
 		}
-
-		/*
-	         * If deleting, and at end of pathname, return parameters
-	         * which can be used to remove file.  If the wantparent flag
-	         * isn't set, we return only the directory, otherwise we go on
-	         * and lock the inode, being careful with ".".
-	         */
-		if (nameiop == DELETE && islastcn) {
-			/*
-	                 * Check for write access on directory.
-	                 */
-			facp.xuid = fattr->uid;
-			facp.facc_flags |= FACCESS_STICKY;
-			err = fuse_internal_access(dvp, VWRITE, &facp, td, cred);
-			facp.facc_flags &= ~FACCESS_XQUERIES;
-
-			if (err) {
-				goto out;
-			}
-			if (nid == VTOI(dvp)) {
-				vref(dvp);
-				*vpp = dvp;
-			} else {
-				err = fuse_vnode_get(dvp->v_mount, feo, nid,
-				    dvp, &vp, cnp, IFTOVT(fattr->mode));
-				if (err)
-					goto out;
-				*vpp = vp;
-			}
-
-			/*
-			 * Save the name for use in VOP_RMDIR and VOP_REMOVE
-			 * later.
-			 */
-			cnp->cn_flags |= SAVENAME;
-			goto out;
-
-		}
-		/*
-	         * If rewriting (RENAME), return the inode and the
-	         * information required to rewrite the present directory
-	         * Must get inode of directory entry to verify it's a
-	         * regular file, or empty directory.
-	         */
-		if (nameiop == RENAME && wantparent && islastcn) {
-
-#if 0 /* THINK_ABOUT_THIS */
-			if ((err = fuse_internal_access(dvp, VWRITE, cred, td, &facp))) {
-				goto out;
-			}
-#endif
-
-			/*
-	                 * Check for "."
-	                 */
-			if (nid == VTOI(dvp)) {
-				err = EISDIR;
-				goto out;
-			}
-			err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp,
-			    &vp, cnp, IFTOVT(fattr->mode));
-			if (err) {
-				goto out;
-			}
-			*vpp = vp;
-			/*
-	                 * Save the name for use in VOP_RENAME later.
-	                 */
-			cnp->cn_flags |= SAVENAME;
-
-			goto out;
-		}
+	} else {
+		/* Entry was found */
 		if (flags & ISDOTDOT) {
-			struct mount *mp;
-			int ltype;
+			struct fuse_lookup_alloc_arg flaa;
 
-			/*
-			 * Expanded copy of vn_vget_ino() so that
-			 * fuse_vnode_get() can be used.
-			 */
-			mp = dvp->v_mount;
-			ltype = VOP_ISLOCKED(dvp);
-			err = vfs_busy(mp, MBF_NOWAIT);
-			if (err != 0) {
-				vfs_ref(mp);
-				VOP_UNLOCK(dvp, 0);
-				err = vfs_busy(mp, 0);
-				vn_lock(dvp, ltype | LK_RETRY);
-				vfs_rel(mp);
-				if (err)
-					goto out;
-				if ((dvp->v_iflag & VI_DOOMED) != 0) {
-					err = ENOENT;
-					vfs_unbusy(mp);
-					goto out;
-				}
-			}
-			VOP_UNLOCK(dvp, 0);
-			err = fuse_vnode_get(vnode_mount(dvp), feo, nid, NULL,
-			    &vp, cnp, IFTOVT(fattr->mode));
-			vfs_unbusy(mp);
-			vn_lock(dvp, ltype | LK_RETRY);
-			if ((dvp->v_iflag & VI_DOOMED) != 0) {
-				if (err == 0)
-					vput(vp);
-				err = ENOENT;
-			}
-			if (err)
-				goto out;
+			flaa.nid = nid;
+			flaa.feo = feo;
+			flaa.cnp = cnp;
+			flaa.vtyp = vtyp;
+			err = vn_vget_ino_gen(dvp, fuse_lookup_alloc, &flaa, 0,
+				&vp);
 			*vpp = vp;
 		} else if (nid == VTOI(dvp)) {
 			vref(dvp);
@@ -938,25 +1139,26 @@
 			struct fuse_vnode_data *fvdat;
 
 			err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp,
-			    &vp, cnp, IFTOVT(fattr->mode));
-			if (err) {
+			    &vp, cnp, vtyp);
+			if (err)
 				goto out;
-			}
-			fuse_vnode_setparent(vp, dvp);
+			*vpp = vp;
 
 			/*
 			 * In the case where we are looking up a FUSE node
 			 * represented by an existing cached vnode, and the
 			 * true size reported by FUSE_LOOKUP doesn't match
-			 * the vnode's cached size, fix the vnode cache to
-			 * match the real object size.
+			 * the vnode's cached size, then any cached writes
+			 * beyond the file's current size are lost.
 			 *
-			 * This can occur via FUSE distributed filesystems,
-			 * irregular files, etc.
+			 * We can get here:
+			 * * following attribute cache expiration, or
+			 * * due a bug in the daemon, or
 			 */
 			fvdat = VTOFUD(vp);
 			if (vnode_isreg(vp) &&
-			    fattr->size != fvdat->filesize) {
+			    filesize != fvdat->cached_attrs.va_size &&
+			    fvdat->flag & FN_SIZECHANGE) {
 				/*
 				 * The FN_SIZECHANGE flag reflects a dirty
 				 * append.  If userspace lets us know our cache
@@ -966,131 +1168,64 @@
 				 *
 				 * XXX: Maybe disable WB caching on this mount.
 				 */
-				if (fvdat->flag & FN_SIZECHANGE)
-					printf("%s: WB cache incoherent on "
-					    "%s!\n", __func__,
-					    vnode_mount(vp)->mnt_stat.f_mntonname);
+				printf("%s: WB cache incoherent on %s!\n",
+				    __func__,
+				    vnode_mount(vp)->mnt_stat.f_mntonname);
 
-				(void)fuse_vnode_setsize(vp, fattr->size);
 				fvdat->flag &= ~FN_SIZECHANGE;
 			}
-			*vpp = vp;
-		}
 
-		if (op == FUSE_GETATTR) {
-			struct fuse_attr_out *fao =
-				(struct fuse_attr_out*)fdi.answ;
-			fuse_internal_cache_attrs(*vpp,
-				&fao->attr, fao->attr_valid,
-				fao->attr_valid_nsec, NULL);
-		} else {
-			struct fuse_entry_out *feo =
-				(struct fuse_entry_out*)fdi.answ;
-			fuse_internal_cache_attrs(*vpp,
-				&feo->attr, feo->attr_valid,
-				feo->attr_valid_nsec, NULL);
-		}
+			MPASS(feo != NULL);
+			fuse_internal_cache_attrs(*vpp, &feo->attr,
+				feo->attr_valid, feo->attr_valid_nsec, NULL);
+			fuse_validity_2_bintime(feo->entry_valid,
+				feo->entry_valid_nsec,
+				&fvdat->entry_cache_timeout);
 
-		/* Insert name into cache if appropriate. */
+			if ((nameiop == DELETE || nameiop == RENAME) &&
+				islastcn)
+			{
+				struct vattr dvattr;
 
-		/*
-	         * Nooo, caching is evil. With caching, we can't avoid stale
-	         * information taking over the playground (cached info is not
-	         * just positive/negative, it does have qualitative aspects,
-	         * too). And a (VOP/FUSE)_GETATTR is always thrown anyway, when
-	         * walking down along cached path components, and that's not
-	         * any cheaper than FUSE_LOOKUP. This might change with
-	         * implementing kernel side attr caching, but... In Linux,
-	         * lookup results are not cached, and the daemon is bombarded
-	         * with FUSE_LOOKUPS on and on. This shows that by design, the
-	         * daemon is expected to handle frequent lookup queries
-	         * efficiently, do its caching in userspace, and so on.
-	         *
-	         * So just leave the name cache alone.
-	         */
-
-		/*
-	         * Well, now I know, Linux caches lookups, but with a
-	         * timeout... So it's the same thing as attribute caching:
-	         * we can deal with it when implement timeouts.
-	         */
-#if 0
-		if (cnp->cn_flags & MAKEENTRY) {
-			cache_enter(dvp, *vpp, cnp);
-		}
-#endif
-	}
-out:
-	if (!lookup_err) {
-
-		/* No lookup error; need to clean up. */
-
-		if (err) {		/* Found inode; exit with no vnode. */
-			if (op == FUSE_LOOKUP) {
-				fuse_internal_forget_send(vnode_mount(dvp), td, cred,
-				    nid, 1);
-			}
-			fdisp_destroy(&fdi);
-			return err;
-		} else {
-#ifndef NO_EARLY_PERM_CHECK_HACK
-			if (!islastcn) {
-				/*
-				 * We have the attributes of the next item
-				 * *now*, and it's a fact, and we do not
-				 * have to do extra work for it (ie, beg the
-				 * daemon), and it neither depends on such
-				 * accidental things like attr caching. So
-				 * the big idea: check credentials *now*,
-				 * not at the beginning of the next call to
-				 * lookup.
-				 * 
-				 * The first item of the lookup chain (fs root)
-				 * won't be checked then here, of course, as
-				 * its never "the next". But go and see that
-				 * the root is taken care about at the very
-				 * beginning of this function.
-				 * 
-				 * Now, given we want to do the access check
-				 * this way, one might ask: so then why not
-				 * do the access check just after fetching
-				 * the inode and its attributes from the
-				 * daemon? Why bother with producing the
-				 * corresponding vnode at all if something
-				 * is not OK? We know what's the deal as
-				 * soon as we get those attrs... There is
-				 * one bit of info though not given us by
-				 * the daemon: whether his response is
-				 * authoritative or not... His response should
-				 * be ignored if something is mounted over
-				 * the dir in question. But that can be
-				 * known only by having the vnode...
+				err = fuse_internal_access(dvp, VWRITE, td,
+					cred);
+				if (err != 0)
+					goto out;
+				/* 
+				 * if the parent's sticky bit is set, check
+				 * whether we're allowed to remove the file.
+				 * Need to figure out the vnode locking to make
+				 * this work.
 				 */
-				int tmpvtype = vnode_vtype(*vpp);
-
-				bzero(&facp, sizeof(facp));
-				/*the early perm check hack */
-				    facp.facc_flags |= FACCESS_VA_VALID;
-
-				if ((tmpvtype != VDIR) && (tmpvtype != VLNK)) {
-					err = ENOTDIR;
+				fuse_internal_getattr(dvp, &dvattr, cred, td);
+				if ((dvattr.va_mode & S_ISTXT) &&
+					fuse_internal_access(dvp, VADMIN, td,
+						cred) &&
+					fuse_internal_access(*vpp, VADMIN, td,
+						cred)) {
+					err = EPERM;
+					goto out;
 				}
-				if (!err && !vnode_mountedhere(*vpp)) {
-					err = fuse_internal_access(*vpp, VEXEC, &facp, td, cred);
-				}
-				if (err) {
-					if (tmpvtype == VLNK)
-						SDT_PROBE2(fuse, , vnops, trace,
-						    1, "weird, permission "
-						    "error with a symlink?");
-					vput(*vpp);
-					*vpp = NULL;
-				}
 			}
-#endif
+
+			if (islastcn && (
+				(nameiop == DELETE) ||
+				(nameiop == RENAME && wantparent))) {
+				cnp->cn_flags |= SAVENAME;
+			}
+
 		}
 	}
-	fdisp_destroy(&fdi);
+out:
+	if (err) {
+		if (vp != NULL && dvp != vp)
+			vput(vp);
+		else if (vp != NULL)
+			vrele(vp);
+		*vpp = NULL;
+	}
+	if (did_lookup)
+		fdisp_destroy(&fdi);
 
 	return err;
 }
@@ -1117,6 +1252,7 @@
 		return ENXIO;
 	}
 	fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode);
+	fmdi.umask = curthread->td_proc->p_fd->fd_cmask;
 
 	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi,
 	    sizeof(fmdi), VDIR));
@@ -1134,12 +1270,19 @@
 fuse_vnop_mknod(struct vop_mknod_args *ap)
 {
 
-	return (EINVAL);
-}
+	struct vnode *dvp = ap->a_dvp;
+	struct vnode **vpp = ap->a_vpp;
+	struct componentname *cnp = ap->a_cnp;
+	struct vattr *vap = ap->a_vap;
 
+	if (fuse_isdeadfs(dvp))
+		return ENXIO;
 
+	return fuse_internal_mknod(dvp, vpp, cnp, vap);
+}
+
 /*
-    struct vnop_open_args {
+    struct vop_open_args {
 	struct vnode *a_vp;
 	int  a_mode;
 	struct ucred *a_cred;
@@ -1151,50 +1294,27 @@
 fuse_vnop_open(struct vop_open_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
-	int mode = ap->a_mode;
+	int a_mode = ap->a_mode;
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
-
-	fufh_type_t fufh_type;
+	pid_t pid = td->td_proc->p_pid;
 	struct fuse_vnode_data *fvdat;
 
-	int error, isdir = 0;
-	int32_t fuse_open_flags;
-
-	if (fuse_isdeadfs(vp)) {
+	if (fuse_isdeadfs(vp))
 		return ENXIO;
-	}
-	if ((mode & (FREAD | FWRITE)) == 0)
+	if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO)
+		return (EOPNOTSUPP);
+	if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0)
 		return EINVAL;
 
 	fvdat = VTOFUD(vp);
 
-	if (vnode_isdir(vp)) {
-		isdir = 1;
-	}
-	fuse_open_flags = 0;
-	if (isdir) {
-		fufh_type = FUFH_RDONLY;
-	} else {
-		fufh_type = fuse_filehandle_xlate_from_fflags(mode);
-		/*
-		 * For WRONLY opens, force DIRECT_IO.  This is necessary
-		 * since writing a partial block through the buffer cache
-		 * will result in a read of the block and that read won't
-		 * be allowed by the WRONLY open.
-		 */
-		if (fufh_type == FUFH_WRONLY ||
-		    (fvdat->flag & FN_DIRECTIO) != 0)
-			fuse_open_flags = FOPEN_DIRECT_IO;
-	}
-
-	if (fuse_filehandle_validrw(vp, fufh_type) != FUFH_INVALID) {
-		fuse_vnode_open(vp, fuse_open_flags, td);
+	if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) {
+		fuse_vnode_open(vp, 0, td);
 		return 0;
 	}
-	error = fuse_filehandle_open(vp, fufh_type, NULL, td, cred);
 
-	return error;
+	return fuse_filehandle_open(vp, a_mode, NULL, td, cred);
 }
 
 static int
@@ -1237,6 +1357,7 @@
 	struct uio *uio = ap->a_uio;
 	int ioflag = ap->a_ioflag;
 	struct ucred *cred = ap->a_cred;
+	pid_t pid = curthread->td_proc->p_pid;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
@@ -1246,7 +1367,7 @@
 		ioflag |= IO_DIRECT;
 	}
 
-	return fuse_io_dispatch(vp, uio, ioflag, cred);
+	return fuse_io_dispatch(vp, uio, ioflag, cred, pid);
 }
 
 /*
@@ -1255,7 +1376,7 @@
 	struct uio *a_uio;
 	struct ucred *a_cred;
 	int *a_eofflag;
-	int *ncookies;
+	int *a_ncookies;
 	u_long **a_cookies;
     };
 */
@@ -1265,13 +1386,18 @@
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct ucred *cred = ap->a_cred;
-
 	struct fuse_filehandle *fufh = NULL;
 	struct fuse_iov cookediov;
-
 	int err = 0;
-	int freefufh = 0;
+	u_long *cookies;
+	off_t startoff;
+	ssize_t tresid;
+	int ncookies;
+	bool closefufh = false;
+	pid_t pid = curthread->td_proc->p_pid;
 
+	if (ap->a_eofflag)
+		*ap->a_eofflag = 0;
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
@@ -1280,26 +1406,61 @@
 		return EINVAL;
 	}
 
-	if (!fuse_filehandle_valid(vp, FUFH_RDONLY)) {
-		SDT_PROBE2(fuse, , vnops, trace, 1,
-			"calling readdir() before open()");
-		err = fuse_filehandle_open(vp, FUFH_RDONLY, &fufh, NULL, cred);
-		freefufh = 1;
-	} else {
-		err = fuse_filehandle_get(vp, FUFH_RDONLY, &fufh);
+	tresid = uio->uio_resid;
+	startoff = uio->uio_offset;
+	err = fuse_filehandle_get_dir(vp, &fufh, cred, pid);
+	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
+		/* 
+		 * nfsd will do VOP_READDIR without first doing VOP_OPEN.  We
+		 * must implicitly open the directory here
+		 */
+		err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred);
+		if (err == 0) {
+			/*
+			 * When a directory is opened, it must be read from
+			 * the beginning.  Hopefully, the "startoff" still
+			 * exists as an offset cookie for the directory.
+			 * If not, it will read the entire directory without
+			 * returning any entries and just return eof.
+			 */
+			uio->uio_offset = 0;
+		}
+		closefufh = true;
 	}
-	if (err) {
+	if (err)
 		return (err);
+	if (ap->a_ncookies != NULL) {
+		ncookies = uio->uio_resid /
+			(offsetof(struct dirent, d_name) + 4) + 1;
+		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
+		*ap->a_ncookies = ncookies;
+		*ap->a_cookies = cookies;
+	} else {
+		ncookies = 0;
+		cookies = NULL;
 	}
 #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1)
 	fiov_init(&cookediov, DIRCOOKEDSIZE);
 
-	err = fuse_internal_readdir(vp, uio, fufh, &cookediov);
+	err = fuse_internal_readdir(vp, uio, startoff, fufh, &cookediov,
+		&ncookies, cookies);
 
 	fiov_teardown(&cookediov);
-	if (freefufh) {
-		fuse_filehandle_close(vp, FUFH_RDONLY, NULL, cred);
+	if (closefufh)
+		fuse_filehandle_close(vp, fufh, curthread, cred);
+
+	if (ap->a_ncookies != NULL) {
+		if (err == 0) {
+			*ap->a_ncookies -= ncookies;
+		} else {
+			free(*ap->a_cookies, M_TEMP);
+			*ap->a_ncookies = 0;
+			*ap->a_cookies = NULL;
+		}
 	}
+	if (err == 0 && tresid == uio->uio_resid)
+		*ap->a_eofflag = 1;
+
 	return err;
 }
 
@@ -1356,22 +1517,16 @@
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
-
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
-	struct fuse_filehandle *fufh = NULL;
+	struct fuse_filehandle *fufh, *fufh_tmp;
 
-	int type;
-
 	if (!fvdat) {
 		panic("FUSE: no vnode data during recycling");
 	}
-	for (type = 0; type < FUFH_MAXTYPE; type++) {
-		fufh = &(fvdat->fufh[type]);
-		if (FUFH_IS_VALID(fufh)) {
-			printf("FUSE: vnode being reclaimed but fufh (type=%d) is valid",
-			    type);
-			fuse_filehandle_close(vp, type, td, NULL);
-		}
+	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
+		printf("FUSE: vnode being reclaimed with open fufh "
+			"(type=%#x)", fufh->fufh_type);
+		fuse_filehandle_close(vp, fufh, td, NULL);
 	}
 
 	if ((!fuse_isdeadfs(vp)) && (fvdat->nlookup)) {
@@ -1409,12 +1564,9 @@
 	if (vnode_isdir(vp)) {
 		return EPERM;
 	}
-	cache_purge(vp);
 
 	err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK);
 
-	if (err == 0)
-		fuse_internal_vnode_disappear(vp);
 	return err;
 }
 
@@ -1438,7 +1590,8 @@
 	struct vnode *tvp = ap->a_tvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct fuse_data *data;
-
+	bool newparent = fdvp != tdvp;
+	bool isdir = fvp->v_type == VDIR;
 	int err = 0;
 
 	if (fuse_isdeadfs(fdvp)) {
@@ -1446,7 +1599,7 @@
 	}
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp && fvp->v_mount != tvp->v_mount)) {
-		SDT_PROBE2(fuse, , vnops, trace, 1, "cross-device rename");
+		SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename");
 		err = EXDEV;
 		goto out;
 	}
@@ -1457,7 +1610,17 @@
 	 * under the source directory in the file system tree.
 	 * Linux performs this check at VFS level.
 	 */
+	/* 
+	 * If source is a directory, and it will get a new parent, user must
+	 * have write permission to it, so ".." can be modified.
+	 */
 	data = fuse_get_mpdata(vnode_mount(tdvp));
+	if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) {
+		err = fuse_internal_access(fvp, VWRITE,
+			tcnp->cn_thread, tcnp->cn_cred);
+		if (err)
+			goto out;
+	}
 	sx_xlock(&data->rename_lock);
 	err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
 	if (err == 0) {
@@ -1515,8 +1678,6 @@
 	}
 	err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR);
 
-	if (err == 0)
-		fuse_internal_vnode_disappear(vp);
 	return err;
 }
 
@@ -1535,129 +1696,137 @@
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
+	struct mount *mp;
+	struct fuse_data *data;
+	struct vattr old_va;
+	int dataflags;
+	int err = 0, err2;
+	accmode_t accmode = 0;
+	bool checkperm;
+	bool drop_suid = false;
+	gid_t cr_gid;
 
-	struct fuse_dispatcher fdi;
-	struct fuse_setattr_in *fsai;
-	struct fuse_access_param facp;
+	mp = vnode_mount(vp);
+	data = fuse_get_mpdata(mp);
+	dataflags = data->dataflags;
+	checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS;
+	if (cred->cr_ngroups > 0)
+		cr_gid = cred->cr_groups[0];
+	else
+		cr_gid = 0;
 
-	int err = 0;
-	enum vtype vtyp;
-	int sizechanged = 0;
-	uint64_t newsize = 0;
-
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
-	fdisp_init(&fdi, sizeof(*fsai));
-	fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred);
-	fsai = fdi.indata;
-	fsai->valid = 0;
 
-	bzero(&facp, sizeof(facp));
-
-	facp.xuid = vap->va_uid;
-	facp.xgid = vap->va_gid;
-
 	if (vap->va_uid != (uid_t)VNOVAL) {
-		facp.facc_flags |= FACCESS_CHOWN;
-		fsai->uid = vap->va_uid;
-		fsai->valid |= FATTR_UID;
+		if (checkperm) {
+			/* Only root may change a file's owner */
+			err = priv_check_cred(cred, PRIV_VFS_CHOWN);
+			if (err) {
+				/* As a special case, allow the null chown */
+				err2 = fuse_internal_getattr(vp, &old_va, cred,
+					td);
+				if (err2)
+					return (err2);
+				if (vap->va_uid != old_va.va_uid)
+					return err;
+				else
+					accmode |= VADMIN;
+				drop_suid = true;
+			} else
+				accmode |= VADMIN;
+		} else
+			accmode |= VADMIN;
 	}
 	if (vap->va_gid != (gid_t)VNOVAL) {
-		facp.facc_flags |= FACCESS_CHOWN;
-		fsai->gid = vap->va_gid;
-		fsai->valid |= FATTR_GID;
+		if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN))
+			drop_suid = true;
+		if (checkperm && !groupmember(vap->va_gid, cred))
+		{
+			/*
+			 * Non-root users may only chgrp to one of their own
+			 * groups 
+			 */
+			err = priv_check_cred(cred, PRIV_VFS_CHOWN);
+			if (err) {
+				/* As a special case, allow the null chgrp */
+				err2 = fuse_internal_getattr(vp, &old_va, cred,
+					td);
+				if (err2)
+					return (err2);
+				if (vap->va_gid != old_va.va_gid)
+					return err;
+				accmode |= VADMIN;
+			} else
+				accmode |= VADMIN;
+		} else
+			accmode |= VADMIN;
 	}
 	if (vap->va_size != VNOVAL) {
-
-		struct fuse_filehandle *fufh = NULL;
-
-		/*Truncate to a new value. */
-		    fsai->size = vap->va_size;
-		sizechanged = 1;
-		newsize = vap->va_size;
-		fsai->valid |= FATTR_SIZE;
-
-		fuse_filehandle_getrw(vp, FUFH_WRONLY, &fufh);
-		if (fufh) {
-			fsai->fh = fufh->fh_id;
-			fsai->valid |= FATTR_FH;
+		switch (vp->v_type) {
+		case VDIR:
+			return (EISDIR);
+		case VLNK:
+		case VREG:
+			if (vfs_isrdonly(mp))
+				return (EROFS);
+			break;
+		default:
+			/*
+			 * According to POSIX, the result is unspecified
+			 * for file types other than regular files,
+			 * directories and shared memory objects.  We
+			 * don't support shared memory objects in the file
+			 * system, and have dubious support for truncating
+			 * symlinks.  Just ignore the request in other cases.
+			 */
+			return (0);
 		}
+		/* Don't set accmode.  Permission to trunc is checked upstack */
 	}
-	if (vap->va_atime.tv_sec != VNOVAL) {
-		fsai->atime = vap->va_atime.tv_sec;
-		fsai->atimensec = vap->va_atime.tv_nsec;
-		fsai->valid |= FATTR_ATIME;
+	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
+		if (vap->va_vaflags & VA_UTIMES_NULL)
+			accmode |= VWRITE;
+		else
+			accmode |= VADMIN;
 	}
-	if (vap->va_mtime.tv_sec != VNOVAL) {
-		fsai->mtime = vap->va_mtime.tv_sec;
-		fsai->mtimensec = vap->va_mtime.tv_nsec;
-		fsai->valid |= FATTR_MTIME;
+	if (drop_suid) {
+		if (vap->va_mode != (mode_t)VNOVAL)
+			vap->va_mode &= ~(S_ISUID | S_ISGID);
+		else {
+			err = fuse_internal_getattr(vp, &old_va, cred, td);
+			if (err)
+				return (err);
+			vap->va_mode = old_va.va_mode & ~(S_ISUID | S_ISGID);
+		}
 	}
 	if (vap->va_mode != (mode_t)VNOVAL) {
-		fsai->mode = vap->va_mode & ALLPERMS;
-		fsai->valid |= FATTR_MODE;
+		/* Only root may set the sticky bit on non-directories */
+		if (checkperm && vp->v_type != VDIR && (vap->va_mode & S_ISTXT)
+		    && priv_check_cred(cred, PRIV_VFS_STICKYFILE))
+			return EFTYPE;
+		if (checkperm && (vap->va_mode & S_ISGID)) {
+			err = fuse_internal_getattr(vp, &old_va, cred, td);
+			if (err)
+				return (err);
+			if (!groupmember(old_va.va_gid, cred)) {
+				err = priv_check_cred(cred, PRIV_VFS_SETGID);
+				if (err)
+					return (err);
+			}
+		}
+		accmode |= VADMIN;
 	}
-	if (!fsai->valid) {
-		goto out;
-	}
-	vtyp = vnode_vtype(vp);
 
-	if (fsai->valid & FATTR_SIZE && vtyp == VDIR) {
-		err = EISDIR;
-		goto out;
-	}
-	if (vfs_isrdonly(vnode_mount(vp)) && (fsai->valid & ~FATTR_SIZE || vtyp == VREG)) {
-		err = EROFS;
-		goto out;
-	}
-	if (fsai->valid & ~FATTR_SIZE) {
-	  /*err = fuse_internal_access(vp, VADMIN, context, &facp); */
-	  /*XXX */
-		    err = 0;
-	}
-	facp.facc_flags &= ~FACCESS_XQUERIES;
+	if (vfs_isrdonly(mp))
+		return EROFS;
 
-	if (err && !(fsai->valid & ~(FATTR_ATIME | FATTR_MTIME)) &&
-	    vap->va_vaflags & VA_UTIMES_NULL) {
-		err = fuse_internal_access(vp, VWRITE, &facp, td, cred);
-	}
+	err = fuse_internal_access(vp, accmode, td, cred);
 	if (err)
-		goto out;
-	if ((err = fdisp_wait_answ(&fdi)))
-		goto out;
-	vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode);
-
-	if (vnode_vtype(vp) != vtyp) {
-		if (vnode_vtype(vp) == VNON && vtyp != VNON) {
-			SDT_PROBE2(fuse, , vnops, trace, 1, "FUSE: Dang! "
-				"vnode_vtype is VNON and vtype isn't.");
-		} else {
-			/*
-	                 * STALE vnode, ditch
-	                 *
-			 * The vnode has changed its type "behind our back".
-			 * There's nothing really we can do, so let us just
-			 * force an internal revocation and tell the caller to
-			 * try again, if interested.
-	                 */
-			fuse_internal_vnode_disappear(vp);
-			err = EAGAIN;
-		}
-	}
-	if (err == 0) {
-		struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ;
-		fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
-			fao->attr_valid_nsec, NULL);
-	}
-
-out:
-	fdisp_destroy(&fdi);
-	if (!err && sizechanged) {
-		fuse_vnode_setsize(vp, newsize);
-		VTOFUD(vp)->flag &= ~FN_SIZECHANGE;
-	}
-	return err;
+		return err;
+	else
+		return fuse_internal_setattr(vp, vap, td, cred);
 }
 
 /*
@@ -1676,22 +1845,15 @@
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = ENXIO;
 		bufdone(bp);
-		return ENXIO;
+		return 0;
 	}
-	if (bp->b_iocmd == BIO_WRITE)
-		fuse_vnode_refreshsize(vp, NOCRED);
 
-	(void)fuse_io_strategy(vp, bp);
-
 	/*
-	 * This is a dangerous function. If returns error, that might mean a
-	 * panic. We prefer pretty much anything over being forced to panic
-	 * by a malicious daemon (a demon?). So we just return 0 anyway. You
-	 * should never mind this: this function has its own error
-	 * propagation mechanism via the argument buffer, so
-	 * not-that-melodramatic residents of the call chain still will be
-	 * able to know what to do.
+	 * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags.
+	 * fuse_io_strategy sets bp's error fields
 	 */
+	(void)fuse_io_strategy(vp, bp);
+
 	return 0;
 }
 
@@ -1757,237 +1919,70 @@
 	struct uio *uio = ap->a_uio;
 	int ioflag = ap->a_ioflag;
 	struct ucred *cred = ap->a_cred;
+	pid_t pid = curthread->td_proc->p_pid;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
-	fuse_vnode_refreshsize(vp, cred);
 
 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
 		ioflag |= IO_DIRECT;
 	}
 
-	return fuse_io_dispatch(vp, uio, ioflag, cred);
+	return fuse_io_dispatch(vp, uio, ioflag, cred, pid);
 }
 
-SDT_PROBE_DEFINE1(fuse, , vnops, vnop_getpages_error, "int");
-/*
-    struct vnop_getpages_args {
-	struct vnode *a_vp;
-	vm_page_t *a_m;
-	int a_count;
-	int a_reqpage;
-    };
-*/
-static int
-fuse_vnop_getpages(struct vop_getpages_args *ap)
+static daddr_t
+fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
 {
-	int i, error, nextoff, size, toff, count, npages;
-	struct uio uio;
-	struct iovec iov;
-	vm_offset_t kva;
-	struct buf *bp;
-	struct vnode *vp;
-	struct thread *td;
-	struct ucred *cred;
-	vm_page_t *pages;
+	const int biosize = fuse_iosize(vp);
 
-	vp = ap->a_vp;
-	KASSERT(vp->v_object, ("objectless vp passed to getpages"));
-	td = curthread;			/* XXX */
-	cred = curthread->td_ucred;	/* XXX */
-	pages = ap->a_m;
-	npages = ap->a_count;
+	return (off / biosize);
+}
 
-	if (!fsess_opt_mmap(vnode_mount(vp))) {
-		SDT_PROBE2(fuse, , vnops, trace, 1,
-			"called on non-cacheable vnode??\n");
-		return (VM_PAGER_ERROR);
-	}
+static int
+fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn)
+{
+	off_t filesize;
+	int blksz, err;
+	const int biosize = fuse_iosize(vp);
 
-	/*
-	 * If the last page is partially valid, just return it and allow
-	 * the pager to zero-out the blanks.  Partially valid pages can
-	 * only occur at the file EOF.
-	 *
-	 * XXXGL: is that true for FUSE, which is a local filesystem,
-	 * but still somewhat disconnected from the kernel?
-	 */
-	VM_OBJECT_WLOCK(vp->v_object);
-	if (pages[npages - 1]->valid != 0 && --npages == 0)
-		goto out;
-	VM_OBJECT_WUNLOCK(vp->v_object);
+	err = fuse_vnode_size(vp, &filesize, NULL, NULL);
+	KASSERT(err == 0, ("vfs_bio_getpages can't handle errors here"));
+	if (err)
+		return biosize;
 
-	/*
-	 * We use only the kva address for the buffer, but this is extremely
-	 * convenient and fast.
-	 */
-	bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
-
-	kva = (vm_offset_t)bp->b_data;
-	pmap_qenter(kva, pages, npages);
-	VM_CNT_INC(v_vnodein);
-	VM_CNT_ADD(v_vnodepgsin, npages);
-
-	count = npages << PAGE_SHIFT;
-	iov.iov_base = (caddr_t)kva;
-	iov.iov_len = count;
-	uio.uio_iov = &iov;
-	uio.uio_iovcnt = 1;
-	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
-	uio.uio_resid = count;
-	uio.uio_segflg = UIO_SYSSPACE;
-	uio.uio_rw = UIO_READ;
-	uio.uio_td = td;
-
-	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
-	pmap_qremove(kva, npages);
-
-	uma_zfree(fuse_pbuf_zone, bp);
-
-	if (error && (uio.uio_resid == count)) {
-		SDT_PROBE1(fuse, , vnops, vnop_getpages_error, error);
-		return VM_PAGER_ERROR;
+	if ((off_t)lbn * biosize >= filesize) {
+		blksz = 0;
+	} else if ((off_t)(lbn + 1) * biosize > filesize) {
+		blksz = filesize - (off_t)lbn *biosize;
+	} else {
+		blksz = biosize;
 	}
-	/*
-	 * Calculate the number of bytes read and validate only that number
-	 * of bytes.  Note that due to pending writes, size may be 0.  This
-	 * does not mean that the remaining data is invalid!
-	 */
-
-	size = count - uio.uio_resid;
-	VM_OBJECT_WLOCK(vp->v_object);
-	fuse_vm_page_lock_queues();
-	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
-		vm_page_t m;
-
-		nextoff = toff + PAGE_SIZE;
-		m = pages[i];
-
-		if (nextoff <= size) {
-			/*
-			 * Read operation filled an entire page
-			 */
-			m->valid = VM_PAGE_BITS_ALL;
-			KASSERT(m->dirty == 0,
-			    ("fuse_getpages: page %p is dirty", m));
-		} else if (size > toff) {
-			/*
-			 * Read operation filled a partial page.
-			 */
-			m->valid = 0;
-			vm_page_set_valid_range(m, 0, size - toff);
-			KASSERT(m->dirty == 0,
-			    ("fuse_getpages: page %p is dirty", m));
-		} else {
-			/*
-			 * Read operation was short.  If no error occurred
-			 * we may have hit a zero-fill section.   We simply
-			 * leave valid set to 0.
-			 */
-			;
-		}
-	}
-	fuse_vm_page_unlock_queues();
-out:
-	VM_OBJECT_WUNLOCK(vp->v_object);
-	if (ap->a_rbehind)
-		*ap->a_rbehind = 0;
-	if (ap->a_rahead)
-		*ap->a_rahead = 0;
-	return (VM_PAGER_OK);
+	return (blksz);
 }
 
 /*
-    struct vnop_putpages_args {
+    struct vnop_getpages_args {
 	struct vnode *a_vp;
 	vm_page_t *a_m;
 	int a_count;
-	int a_sync;
-	int *a_rtvals;
-	vm_ooffset_t a_offset;
+	int a_reqpage;
     };
 */
 static int
-fuse_vnop_putpages(struct vop_putpages_args *ap)
+fuse_vnop_getpages(struct vop_getpages_args *ap)
 {
-	struct uio uio;
-	struct iovec iov;
-	vm_offset_t kva;
-	struct buf *bp;
-	int i, error, npages, count;
-	off_t offset;
-	int *rtvals;
-	struct vnode *vp;
-	struct thread *td;
-	struct ucred *cred;
-	vm_page_t *pages;
-	vm_ooffset_t fsize;
+	struct vnode *vp = ap->a_vp;
 
-	vp = ap->a_vp;
-	KASSERT(vp->v_object, ("objectless vp passed to putpages"));
-	fsize = vp->v_object->un_pager.vnp.vnp_size;
-	td = curthread;			/* XXX */
-	cred = curthread->td_ucred;	/* XXX */
-	pages = ap->a_m;
-	count = ap->a_count;
-	rtvals = ap->a_rtvals;
-	npages = btoc(count);
-	offset = IDX_TO_OFF(pages[0]->pindex);
-
 	if (!fsess_opt_mmap(vnode_mount(vp))) {
-		SDT_PROBE2(fuse, , vnops, trace, 1,
+		SDT_PROBE2(fusefs, , vnops, trace, 1,
 			"called on non-cacheable vnode??\n");
+		return (VM_PAGER_ERROR);
 	}
-	for (i = 0; i < npages; i++)
-		rtvals[i] = VM_PAGER_AGAIN;
 
-	/*
-	 * When putting pages, do not extend file past EOF.
-	 */
-
-	if (offset + count > fsize) {
-		count = fsize - offset;
-		if (count < 0)
-			count = 0;
-	}
-	/*
-	 * We use only the kva address for the buffer, but this is extremely
-	 * convenient and fast.
-	 */
-	bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
-
-	kva = (vm_offset_t)bp->b_data;
-	pmap_qenter(kva, pages, npages);
-	VM_CNT_INC(v_vnodeout);
-	VM_CNT_ADD(v_vnodepgsout, count);
-
-	iov.iov_base = (caddr_t)kva;
-	iov.iov_len = count;
-	uio.uio_iov = &iov;
-	uio.uio_iovcnt = 1;
-	uio.uio_offset = offset;
-	uio.uio_resid = count;
-	uio.uio_segflg = UIO_SYSSPACE;
-	uio.uio_rw = UIO_WRITE;
-	uio.uio_td = td;
-
-	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
-
-	pmap_qremove(kva, npages);
-	uma_zfree(fuse_pbuf_zone, bp);
-
-	if (!error) {
-		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
-
-		for (i = 0; i < nwritten; i++) {
-			rtvals[i] = VM_PAGER_OK;
-			VM_OBJECT_WLOCK(pages[i]->object);
-			vm_page_undirty(pages[i]);
-			VM_OBJECT_WUNLOCK(pages[i]->object);
-		}
-	}
-	return rtvals[0];
+	return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
+	    ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz));
 }
 
 static const char extattr_namespace_separator = '.';
@@ -2023,6 +2018,13 @@
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
+	if (!fsess_isimpl(mp, FUSE_GETXATTR))
+		return EOPNOTSUPP;
+
+	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
+	if (err)
+		return err;
+
 	/* Default to looking for user attributes. */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
@@ -2053,8 +2055,10 @@
 
 	err = fdisp_wait_answ(&fdi);
 	if (err != 0) {
-		if (err == ENOSYS)
+		if (err == ENOSYS) {
 			fsess_set_notimpl(mp, FUSE_GETXATTR);
+			err = EOPNOTSUPP;
+		}
 		goto out;
 	}
 
@@ -2100,6 +2104,29 @@
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
+	if (!fsess_isimpl(mp, FUSE_SETXATTR))
+		return EOPNOTSUPP;
+
+	if (vfs_isrdonly(mp))
+		return EROFS;
+
+	/* Deleting xattrs must use VOP_DELETEEXTATTR instead */
+	if (ap->a_uio == NULL) {
+		/*
+		 * If we got here as fallback from VOP_DELETEEXTATTR, then
+		 * return EOPNOTSUPP.
+		 */
+		if (!fsess_isimpl(mp, FUSE_REMOVEXATTR))
+			return (EOPNOTSUPP);
+		else
+			return (EINVAL);
+	}
+
+	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
+		VWRITE);
+	if (err)
+		return err;
+
 	/* Default to looking for user attributes. */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
@@ -2127,11 +2154,14 @@
 
 	err = fdisp_wait_answ(&fdi);
 
-	if (err != 0) {
-		if (err == ENOSYS)
-			fsess_set_notimpl(mp, FUSE_SETXATTR);
-		goto out;
+	if (err == ENOSYS) {
+		fsess_set_notimpl(mp, FUSE_SETXATTR);
+		err = EOPNOTSUPP;
 	}
+	if (err == ERESTART) {
+		/* Can't restart after calling uiomove */
+		err = EINTR;
+	}
 
 out:
 	fdisp_destroy(&fdi);
@@ -2227,6 +2257,13 @@
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
+	if (!fsess_isimpl(mp, FUSE_LISTXATTR))
+		return EOPNOTSUPP;
+
+	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
+	if (err)
+		return err;
+
 	/*
 	 * Add space for a NUL and the period separator if enabled.
 	 * Default to looking for user attributes.
@@ -2251,8 +2288,10 @@
 
 	err = fdisp_wait_answ(&fdi);
 	if (err != 0) {
-		if (err == ENOSYS)
+		if (err == ENOSYS) {
 			fsess_set_notimpl(mp, FUSE_LISTXATTR);
+			err = EOPNOTSUPP;
+		}
 		goto out;
 	}
 
@@ -2267,7 +2306,7 @@
 	/*
 	 * Retrieve Linux / FUSE compatible list values.
 	 */
-	fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
+	fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
 	list_xattr_in = fdi.indata;
 	list_xattr_in->size = linux_list_len + sizeof(*list_xattr_out);
 	attr_str = (char *)fdi.indata + sizeof(*list_xattr_in);
@@ -2330,6 +2369,17 @@
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
+	if (!fsess_isimpl(mp, FUSE_REMOVEXATTR))
+		return EOPNOTSUPP;
+
+	if (vfs_isrdonly(mp))
+		return EROFS;
+
+	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
+		VWRITE);
+	if (err)
+		return err;
+
 	/* Default to looking for user attributes. */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
@@ -2347,9 +2397,9 @@
 	    ap->a_name);
 
 	err = fdisp_wait_answ(&fdi);
-	if (err != 0) {
-		if (err == ENOSYS)
-			fsess_set_notimpl(mp, FUSE_REMOVEXATTR);
+	if (err == ENOSYS) {
+		fsess_set_notimpl(mp, FUSE_REMOVEXATTR);
+		err = EOPNOTSUPP;
 	}
 
 	fdisp_destroy(&fdi);
@@ -2373,3 +2423,48 @@
 
 	return 0;
 }
+	
+/*
+ * Get an NFS filehandle for a FUSE file.
+ *
+ * This will only work for FUSE file systems that guarantee the uniqueness of
+ * nodeid:generation, which most don't.
+ */
+/*
+vop_vptofh {
+	IN struct vnode *a_vp;
+	IN struct fid *a_fhp;
+};
+*/
+static int
+fuse_vnop_vptofh(struct vop_vptofh_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct fuse_vnode_data *fvdat = VTOFUD(vp);
+	struct fuse_fid *fhp = (struct fuse_fid *)(ap->a_fhp);
+	_Static_assert(sizeof(struct fuse_fid) <= sizeof(struct fid),
+		"FUSE fid type is too big");
+	struct mount *mp = vnode_mount(vp);
+	struct fuse_data *data = fuse_get_mpdata(mp);
+	struct vattr va;
+	int err;
+
+	if (!(data->dataflags & FSESS_EXPORT_SUPPORT))
+		return EOPNOTSUPP;
+
+	err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread);
+	if (err)
+		return err;
+
+	/*ip = VTOI(ap->a_vp);*/
+	/*ufhp = (struct ufid *)ap->a_fhp;*/
+	fhp->len = sizeof(struct fuse_fid);
+	fhp->nid = fvdat->nid;
+	if (fvdat->generation <= UINT32_MAX)
+		fhp->gen = fvdat->generation;
+	else
+		return EOVERFLOW;
+	return (0);
+}
+
+
Index: sys/kern/kern_sig.c
===================================================================
--- sys/kern/kern_sig.c
+++ sys/kern/kern_sig.c
@@ -929,6 +929,23 @@
 #endif
 #endif /* COMPAT_43 */
 
+/* Would this signal be fatal to the current process, if it were caught ? */
+bool
+sig_isfatal(struct proc *p, int sig)
+{
+	intptr_t act;
+	int prop;
+
+	mtx_assert(&p->p_sigacts->ps_mtx, MA_OWNED);
+	act = (intptr_t)p->p_sigacts->ps_sigact[_SIG_IDX(sig)];
+	if ((intptr_t)SIG_DFL == act) {
+		prop = sigprop(sig);
+		return (0 != (prop & (SIGPROP_KILL | SIGPROP_CORE)));
+	} else {
+		return (false);
+	}
+}
+
 /*
  * Initialize signal state for process 0;
  * set to ignore signals that are ignored by default.
Index: sys/kern/vfs_cache.c
===================================================================
--- sys/kern/vfs_cache.c
+++ sys/kern/vfs_cache.c
@@ -1964,7 +1964,7 @@
 }
 
 /*
- * Invalidate all entries to a particular vnode.
+ * Invalidate all entries from and to a particular vnode.
  */
 void
 cache_purge(struct vnode *vp)
Index: sys/kern/vfs_subr.c
===================================================================
--- sys/kern/vfs_subr.c
+++ sys/kern/vfs_subr.c
@@ -118,6 +118,8 @@
 static void	vfs_knl_assert_unlocked(void *arg);
 static void	vnlru_return_batches(struct vfsops *mnt_op);
 static void	destroy_vpollinfo(struct vpollinfo *vi);
+static int	v_inval_buf_range1(struct vnode *vp, struct bufobj *bo,
+		    daddr_t startlbn, daddr_t endlbn);
 
 /*
  * These fences are intended for cases where some synchronization is
@@ -945,6 +947,12 @@
  * desirable to reuse such vnodes.  These conditions may cause the
  * number of vnodes to reach some minimum value regardless of what
  * you set kern.maxvnodes to.  Do not set kern.maxvnodes too low.
+ *
+ * @param mp		 Try to reclaim vnodes from this mountpoint
+ * @param reclaim_nc_src Only reclaim directories with outgoing namecache
+ * 			 entries if this argument is strue
+ * @param reclaim_free	 Only reclaim free vnodes if this is set.
+ * @return		 The number of vnodes that were reclaimed.
  */
 static int
 vlrureclaim(struct mount *mp, int reclaim_nc_src, int trigger)
@@ -1954,9 +1962,8 @@
 vtruncbuf(struct vnode *vp, off_t length, int blksize)
 {
 	struct buf *bp, *nbp;
-	int anyfreed;
-	daddr_t trunclbn;
 	struct bufobj *bo;
+	daddr_t startlbn;
 
 	CTR4(KTR_VFS, "%s: vp %p with block %d:%ju", __func__,
 	    vp, blksize, (uintmax_t)length);
@@ -1964,22 +1971,114 @@
 	/*
 	 * Round up to the *next* lbn.
 	 */
-	trunclbn = howmany(length, blksize);
+	startlbn = howmany(length, blksize);
 
 	ASSERT_VOP_LOCKED(vp, "vtruncbuf");
+
 restart:
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
+	if (v_inval_buf_range1(vp, bo, startlbn, INT64_MAX) == EAGAIN)
+		goto restart;
+
+	if (length > 0) {
+restartsync:
+		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
+			if (bp->b_lblkno > 0)
+				continue;
+			/*
+			 * Since we hold the vnode lock this should only
+			 * fail if we're racing with the buf daemon.
+			 */
+			if (BUF_LOCK(bp,
+			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
+			    BO_LOCKPTR(bo)) == ENOLCK) {
+				goto restart;
+			}
+			VNASSERT((bp->b_flags & B_DELWRI), vp,
+			    ("buf(%p) on dirty queue without DELWRI", bp));
+
+			bremfree(bp);
+			bawrite(bp);
+			BO_LOCK(bo);
+			goto restartsync;
+		}
+	}
+
+	bufobj_wwait(bo, 0, 0);
+	BO_UNLOCK(bo);
+	vnode_pager_setsize(vp, length);
+
+	return (0);
+}
+
+/*
+ * Invalidate the cached pages of a file's buffer within the range of block
+ * numbers [startlbn, endlbn).  Every buffer that overlaps that range will be
+ * invalidated.  This must not result in any dirty data being lost.
+ */
+void
+v_inval_buf_range(struct vnode *vp, off_t start, off_t end, int blksize)
+{
+	struct bufobj *bo;
+	daddr_t startlbn, endlbn;
+	vm_pindex_t startp, endp;
+
+	/* Round "outwards" */
+	startlbn = start / blksize;
+	endlbn = howmany(end, blksize);
+	startp = OFF_TO_IDX(start);
+	endp = OFF_TO_IDX(end + PAGE_SIZE - 1);
+
+	ASSERT_VOP_LOCKED(vp, "v_inval_buf_range");
+
+restart:
+	bo = &vp->v_bufobj;
+	BO_LOCK(bo);
+
+#ifdef INVARIANTS
+	struct buf *bp, *nbp;
+
+	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
+		/* 
+		 * Disallow invalidating dirty data outside of the requested
+		 * offsets.  Assume that data within the requested offsets is
+		 * being invalidated for a good reason.
+		 */
+		off_t blkstart, blkend;
+
+		blkstart = bp->b_offset;
+		blkend = bp->b_offset + bp->b_bcount;
+		KASSERT(blkstart >= start && blkend <= end,
+			("Invalidating extra dirty data!"));
+	}
+#endif
+
+	if (v_inval_buf_range1(vp, bo, startlbn, endlbn) == EAGAIN)
+		goto restart;
+
+	BO_UNLOCK(bo);
+	vn_pages_remove(vp, startp, endp);
+}
+
+/* Like v_inval_buf_range, but operates on whole buffers instead of offsets */
+static int
+v_inval_buf_range1(struct vnode *vp, struct bufobj *bo,
+    daddr_t startlbn, daddr_t endlbn)
+{
+	struct buf *bp, *nbp;
+	int anyfreed;
+
 	anyfreed = 1;
 	for (;anyfreed;) {
 		anyfreed = 0;
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
-			if (bp->b_lblkno < trunclbn)
+			if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK)
-				goto restart;
+				return EAGAIN;
 
 			bremfree(bp);
 			bp->b_flags |= (B_INVAL | B_RELBUF);
@@ -1993,17 +2092,17 @@
 			    (nbp->b_vp != vp) ||
 			    (nbp->b_flags & B_DELWRI))) {
 				BO_UNLOCK(bo);
-				goto restart;
+				return EAGAIN;
 			}
 		}
 
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
-			if (bp->b_lblkno < trunclbn)
+			if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK)
-				goto restart;
+				return EAGAIN;
 			bremfree(bp);
 			bp->b_flags |= (B_INVAL | B_RELBUF);
 			bp->b_flags &= ~B_ASYNC;
@@ -2016,40 +2115,11 @@
 			    (nbp->b_vp != vp) ||
 			    (nbp->b_flags & B_DELWRI) == 0)) {
 				BO_UNLOCK(bo);
-				goto restart;
+				return EAGAIN;
 			}
 		}
 	}
-
-	if (length > 0) {
-restartsync:
-		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
-			if (bp->b_lblkno > 0)
-				continue;
-			/*
-			 * Since we hold the vnode lock this should only
-			 * fail if we're racing with the buf daemon.
-			 */
-			if (BUF_LOCK(bp,
-			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
-			    BO_LOCKPTR(bo)) == ENOLCK) {
-				goto restart;
-			}
-			VNASSERT((bp->b_flags & B_DELWRI), vp,
-			    ("buf(%p) on dirty queue without DELWRI", bp));
-
-			bremfree(bp);
-			bawrite(bp);
-			BO_LOCK(bo);
-			goto restartsync;
-		}
-	}
-
-	bufobj_wwait(bo, 0, 0);
-	BO_UNLOCK(bo);
-	vnode_pager_setsize(vp, length);
-
-	return (0);
+	return 0;
 }
 
 static void
Index: sys/sys/signalvar.h
===================================================================
--- sys/sys/signalvar.h
+++ sys/sys/signalvar.h
@@ -384,6 +384,7 @@
 void	sigexit(struct thread *td, int sig) __dead2;
 int	sigev_findtd(struct proc *p, struct sigevent *sigev, struct thread **);
 int	sig_ffs(sigset_t *set);
+bool	sig_isfatal(struct proc *p, int sig);
 void	siginit(struct proc *p);
 void	signotify(struct thread *td);
 void	sigqueue_delete(struct sigqueue *queue, int sig);
Index: sys/sys/vnode.h
===================================================================
--- sys/sys/vnode.h
+++ sys/sys/vnode.h
@@ -659,6 +659,8 @@
 void	vinactive(struct vnode *, struct thread *);
 int	vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo);
 int	vtruncbuf(struct vnode *vp, off_t length, int blksize);
+void	v_inval_buf_range(struct vnode *vp, off_t start, off_t end,
+	    int blksize);
 void	vunref(struct vnode *);
 void	vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3);
 int	vrecycle(struct vnode *vp);
Index: sys/vm/vnode_pager.c
===================================================================
--- sys/vm/vnode_pager.c
+++ sys/vm/vnode_pager.c
@@ -464,8 +464,7 @@
 		 * File has shrunk. Toss any cached pages beyond the new EOF.
 		 */
 		if (nobjsize < object->size)
-			vm_object_page_remove(object, nobjsize, object->size,
-			    0);
+			vm_object_page_remove(object, nobjsize, 0, 0);
 		/*
 		 * this gets rid of garbage at the end of a page that is now
 		 * only partially backed by the vnode.
Index: tests/sys/fs/Makefile
===================================================================
--- tests/sys/fs/Makefile
+++ tests/sys/fs/Makefile
@@ -1,5 +1,7 @@
 # $FreeBSD$
 
+.include <bsd.compiler.mk>
+
 PACKAGE=		tests
 
 TESTSDIR=		${TESTSBASE}/sys/fs
@@ -7,6 +9,9 @@
 TESTSRC=		${SRCTOP}/contrib/netbsd-tests/fs
 
 #TESTS_SUBDIRS+=	nullfs	# XXX: needs rump
+.if ${COMPILER_FEATURES:Mc++14}
+TESTS_SUBDIRS+=		fusefs
+.endif
 TESTS_SUBDIRS+=		tmpfs
 
 ${PACKAGE}FILES+=	h_funcs.subr