Index: etc/mtree/BSD.include.dist
===================================================================
--- etc/mtree/BSD.include.dist
+++ etc/mtree/BSD.include.dist
@@ -193,6 +193,8 @@
     ..
     lib80211
     ..
+    lib9p
+    ..
     libipt
     ..
     libmilter
Index: lib/Makefile
===================================================================
--- lib/Makefile
+++ lib/Makefile
@@ -27,6 +27,7 @@
 SUBDIR=	${SUBDIR_BOOTSTRAP} \
 	.WAIT \
 	geom \
+	lib9p \
 	libalias \
 	libarchive \
 	libauditd \
Index: lib/lib9p/COPYRIGHT
===================================================================
--- /dev/null
+++ lib/lib9p/COPYRIGHT
@@ -0,0 +1,47 @@
+Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+All rights reserved
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted providing that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+Some parts of the code are based on libixp (http://libs.suckless.org/libixp)
+library code released under following license:
+
+© 2005-2006 Anselm R. Garbe <garbeam@gmail.com>
+© 2006-2010 Kris Maglione <maglione.k at Gmail>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
Index: lib/lib9p/GNUmakefile
===================================================================
--- /dev/null
+++ lib/lib9p/GNUmakefile
@@ -0,0 +1,76 @@
+CC_VERSION := $(shell $(CC) --version | \
+    sed -n -e '/clang-/s/.*clang-\([0-9][0-9]*\).*/\1/p')
+ifeq ($(CC_VERSION),)
+# probably not clang
+CC_VERSION := 0
+endif
+
+WFLAGS :=
+
+# Warnings are version-dependent, unfortunately,
+# so test for version before adding a -W flag.
+# Note: gnu make requires $(shell test ...) for "a > b" type tests.
+ifeq ($(shell test $(CC_VERSION) -gt 0; echo $$?),0)
+WFLAGS += -Weverything
+WFLAGS += -Wno-padded
+WFLAGS += -Wno-gnu-zero-variadic-macro-arguments
+WFLAGS += -Wno-format-nonliteral
+WFLAGS += -Wno-unused-macros
+WFLAGS += -Wno-disabled-macro-expansion
+WFLAGS += -Werror
+endif
+
+ifeq ($(shell test $(CC_VERSION) -gt 600; echo $$?),0)
+WFLAGS += -Wno-reserved-id-macro
+endif
+
+CFLAGS := $(WFLAGS) \
+	-g \
+	-O0 \
+	-DL9P_DEBUG=L9P_DEBUG
+# Note: to turn on debug, use -DL9P_DEBUG=L9P_DEBUG,
+# and set env variable LIB9P_LOGGING to stderr or to
+# the (preferably full path name of) the debug log file.
+
+LIB_SRCS := \
+	pack.c \
+	connection.c \
+	request.c \
+	genacl.c \
+	log.c \
+	hashtable.c \
+	utils.c \
+	rfuncs.c \
+	threadpool.c \
+	sbuf/sbuf.c \
+	transport/socket.c \
+	backend/fs.c
+
+SERVER_SRCS := \
+	example/server.c
+
+BUILD_DIR := build
+LIB_OBJS := $(addprefix build/,$(LIB_SRCS:.c=.o))
+SERVER_OBJS := $(SERVER_SRCS:.c=.o)
+LIB := lib9p.dylib
+SERVER := server
+
+all: build $(LIB) $(SERVER)
+
+$(LIB): $(LIB_OBJS)
+	cc -dynamiclib $^ -o build/$@
+
+$(SERVER): $(SERVER_OBJS) $(LIB)
+	cc $< -o build/$(SERVER) -Lbuild/ -l9p
+
+clean:
+	rm -rf build
+	rm -f $(SERVER_OBJS)
+build:
+	mkdir build
+	mkdir build/sbuf
+	mkdir build/transport
+	mkdir build/backend
+
+build/%.o: %.c
+	$(CC) $(CFLAGS) -c $< -o $@
Index: lib/lib9p/Makefile
===================================================================
--- /dev/null
+++ lib/lib9p/Makefile
@@ -0,0 +1,33 @@
+# $FreeBSD$
+# Note: to turn on debug, use -DL9P_DEBUG=L9P_DEBUG,
+# and set env variable LIB9P_LOGGING to stderr or to
+# the (preferably full path name of) the debug log file.
+
+CFLAGS+=	-DWITH_CASPER -I${.CURDIR}
+DEBUG_FLAGS=	-DL9P_DEBUG=L9P_DEBUG -DACE_DEBUG -g -O0
+
+LIB=		9p
+PACKAGE=	lib${LIB}
+SHLIB_MAJOR=	1
+SHLIBDIR?=	/lib
+SRCS=		connection.c \
+		genacl.c \
+		hashtable.c \
+		log.c \
+		pack.c \
+		request.c \
+		rfuncs.c \
+		threadpool.c \
+		utils.c \
+		backend/fs.c \
+		transport/socket.c
+
+INCSDIR=	${INCLUDEDIR}/lib9p
+INCS=		fid.h lib9p.h backend/fs.h
+
+LIBADD=		sbuf
+
+cscope: .PHONY
+	cd ${.CURDIR}; cscope -buq $$(find . -name '*.[ch]' -print)
+
+.include <bsd.lib.mk>
Index: lib/lib9p/README.md
===================================================================
--- /dev/null
+++ lib/lib9p/README.md
@@ -0,0 +1,20 @@
+# lib9p
+
+lib9p is a server library implementing 9p2000, 9p2000.u and 9p2000.L revisions
+of 9P protocol. It is being developed primarily as a backend for virtio-9p in
+BHyVe, the FreeBSD hypervisor.
+
+# Features
+
+* 9p2000, 9p2000.u and 9p2000.L protocol support
+* Built-in TCP transport
+
+# Supported operating systems
+
+* FreeBSD (>=10)
+* macOS (>=10.9)
+
+# Authors
+
+* Jakub Klama [jceel](https://github.com/jceel)
+* Chris Torek [chris3torek](https://github.com/chris3torek)
Index: lib/lib9p/apple_endian.h
===================================================================
--- /dev/null
+++ lib/lib9p/apple_endian.h
@@ -0,0 +1,27 @@
+#ifndef _APPLE_ENDIAN_H
+#define _APPLE_ENDIAN_H
+
+/*
+ * Shims to make Apple's endian headers and macros compatible
+ * with <sys/endian.h> (which is awful).
+ */
+
+# include <libkern/OSByteOrder.h>
+
+# define _LITTLE_ENDIAN 0x12345678
+# define _BIG_ENDIAN 0x87654321
+
+# ifdef __LITTLE_ENDIAN__
+#  define _BYTE_ORDER _LITTLE_ENDIAN
+# endif
+# ifdef __BIG_ENDIAN__
+#  define _BYTE_ORDER _BIG_ENDIAN
+# endif
+
+# define htole32(x)	OSSwapHostToLittleInt32(x)
+# define le32toh(x)	OSSwapLittleToHostInt32(x)
+
+# define htobe32(x)	OSSwapHostToBigInt32(x)
+# define be32toh(x)	OSSwapBigToHostInt32(x)
+
+#endif /* _APPLE_ENDIAN_H */
Index: lib/lib9p/backend/backend.h
===================================================================
--- /dev/null
+++ lib/lib9p/backend/backend.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+
+#ifndef LIB9P_BACKEND_H
+#define LIB9P_BACKEND_H
+
+struct l9p_backend {
+	void *softc;
+	void (*freefid)(void *, struct l9p_fid *);
+	int (*attach)(void *, struct l9p_request *);
+	int (*clunk)(void *, struct l9p_fid *);
+	int (*create)(void *, struct l9p_request *);
+	int (*open)(void *, struct l9p_request *);
+	int (*read)(void *, struct l9p_request *);
+	int (*remove)(void *, struct l9p_fid *);
+	int (*stat)(void *, struct l9p_request *);
+	int (*walk)(void *, struct l9p_request *);
+	int (*write)(void *, struct l9p_request *);
+	int (*wstat)(void *, struct l9p_request *);
+	int (*statfs)(void *, struct l9p_request *);
+	int (*lopen)(void *, struct l9p_request *);
+	int (*lcreate)(void *, struct l9p_request *);
+	int (*symlink)(void *, struct l9p_request *);
+	int (*mknod)(void *, struct l9p_request *);
+	int (*rename)(void *, struct l9p_request *);
+	int (*readlink)(void *, struct l9p_request *);
+	int (*getattr)(void *, struct l9p_request *);
+	int (*setattr)(void *, struct l9p_request *);
+	int (*xattrwalk)(void *, struct l9p_request *);
+	int (*xattrcreate)(void *, struct l9p_request *);
+	int (*xattrread)(void *, struct l9p_request *);
+	int (*xattrwrite)(void *, struct l9p_request *);
+	int (*xattrclunk)(void *, struct l9p_fid *);
+	int (*readdir)(void *, struct l9p_request *);
+	int (*fsync)(void *, struct l9p_request *);
+	int (*lock)(void *, struct l9p_request *);
+	int (*getlock)(void *, struct l9p_request *);
+	int (*link)(void *, struct l9p_request *);
+	int (*mkdir)(void *, struct l9p_request *);
+	int (*renameat)(void *, struct l9p_request *);
+	int (*unlinkat)(void *, struct l9p_request *);
+};
+
+#endif  /* LIB9P_BACKEND_H */
Index: lib/lib9p/backend/fs.h
===================================================================
--- /dev/null
+++ lib/lib9p/backend/fs.h
@@ -0,0 +1,37 @@
+
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_BACKEND_FS_H
+#define LIB9P_BACKEND_FS_H
+
+#include <stdbool.h>
+#include "backend.h"
+
+int l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro);
+
+#endif  /* LIB9P_BACKEND_FS_H */
Index: lib/lib9p/backend/fs.c
===================================================================
--- /dev/null
+++ lib/lib9p/backend/fs.c
@@ -0,0 +1,3031 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Based on libixp code: ｩ2007-2010 Kris Maglione <maglione.k at Gmail>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <dirent.h>
+#include <pwd.h>
+#include <grp.h>
+#include <libgen.h>
+#include <pthread.h>
+#include "../lib9p.h"
+#include "../lib9p_impl.h"
+#include "../fid.h"
+#include "../log.h"
+#include "../rfuncs.h"
+#include "../genacl.h"
+#include "backend.h"
+#include "fs.h"
+
+#if defined(WITH_CASPER)
+  #include <libcasper.h>
+  #include <casper/cap_pwd.h>
+  #include <casper/cap_grp.h>
+#endif
+
+#if defined(__FreeBSD__)
+  #include <sys/param.h>
+  #if __FreeBSD_version >= 1000000
+    #define	HAVE_BINDAT
+  #endif
+#endif
+
+#if defined(__FreeBSD__)
+  #define	HAVE_BIRTHTIME
+#endif
+
+#if defined(__APPLE__)
+  #include "Availability.h"
+  #define ACL_TYPE_NFS4 ACL_TYPE_EXTENDED
+#endif
+
+struct fs_softc {
+	int 	fs_rootfd;
+	bool	fs_readonly;
+#if defined(WITH_CASPER)
+	cap_channel_t *fs_cappwd;
+	cap_channel_t *fs_capgrp;
+#endif
+};
+
+struct fs_fid {
+	DIR	*ff_dir;
+	int	ff_dirfd;
+	int	ff_fd;
+	int	ff_flags;
+	char	*ff_name;
+	struct fs_authinfo *ff_ai;
+	pthread_mutex_t ff_mtx;
+	struct l9p_acl *ff_acl; /* cached ACL if any */
+};
+
+#define	FF_NO_NFSV4_ACL	0x01	/* don't go looking for NFSv4 ACLs */
+/*	FF_NO_POSIX_ACL	0x02	-- not yet */
+
+/*
+ * Our authinfo consists of:
+ *
+ *  - a reference count
+ *  - a uid
+ *  - a gid-set
+ *
+ * The "default" gid is the first gid in the git-set, provided the
+ * set size is at least 1.  The set-size may be zero, though.
+ *
+ * Adjustments to the ref-count must be atomic, once it's shared.
+ * It would be nice to use C11 atomics here but they are not common
+ * enough to all systems just yet; for now, we use a mutex.
+ *
+ * Note that some ops (Linux style ones) pass an effective gid for
+ * the op, in which case, that gid may override.  To achieve this
+ * effect, permissions testing functions also take an extra gid.
+ * If this gid is (gid_t)-1 it is not used and only the remaining
+ * gids take part.
+ *
+ * The uid may also be (uid_t)-1, meaning "no uid was available
+ * at all at attach time".  In this case, new files inherit parent
+ * directory uids.
+ *
+ * The refcount is simply the number of "openfile"s using this
+ * authinfo (so that when the last ref goes away, we can free it).
+ *
+ * There are also master ACL flags (same as in ff_flags).
+ */
+struct fs_authinfo {
+	pthread_mutex_t ai_mtx;	/* lock for refcnt */
+	uint32_t ai_refcnt;
+	int	ai_flags;
+	uid_t	ai_uid;
+	int	ai_ngids;
+	gid_t	ai_gids[];	/* NB: flexible array member */
+};
+
+/*
+ * We have a global-static mutex for single-threading Tattach
+ * requests, which use getpwnam (and indirectly, getgr* functions)
+ * which are not reentrant.
+ */
+static bool fs_attach_mutex_inited;
+static pthread_mutex_t fs_attach_mutex;
+
+/*
+ * Internal functions (except inline functions).
+ */
+static struct passwd *fs_getpwuid(struct fs_softc *, uid_t, struct r_pgdata *);
+static struct group *fs_getgrgid(struct fs_softc *, gid_t, struct r_pgdata *);
+static int fs_buildname(struct l9p_fid *, char *, char *, size_t);
+static int fs_pdir(struct fs_softc *, struct l9p_fid *, char *, size_t,
+    struct stat *st);
+static int fs_dpf(char *, char *, size_t);
+static int fs_oflags_dotu(int, int *);
+static int fs_oflags_dotl(uint32_t, int *, enum l9p_omode *);
+static int fs_nde(struct fs_softc *, struct l9p_fid *, bool, gid_t,
+    struct stat *, uid_t *, gid_t *);
+static struct fs_fid *open_fid(int, const char *, struct fs_authinfo *, bool);
+static void dostat(struct fs_softc *, struct l9p_stat *, char *,
+    struct stat *, bool dotu);
+static void dostatfs(struct l9p_statfs *, struct statfs *, long);
+static void fillacl(struct fs_fid *ff);
+static struct l9p_acl *getacl(struct fs_fid *ff, int fd, const char *path);
+static void dropacl(struct fs_fid *ff);
+static struct l9p_acl *look_for_nfsv4_acl(struct fs_fid *ff, int fd,
+    const char *path);
+static int check_access(int32_t,
+    struct l9p_acl *, struct stat *, struct l9p_acl *, struct stat *,
+    struct fs_authinfo *, gid_t);
+static void generate_qid(struct stat *, struct l9p_qid *);
+
+static int fs_icreate(void *, struct l9p_fid *, char *, int,
+    bool, mode_t, gid_t, struct stat *);
+static int fs_iopen(void *, struct l9p_fid *, int, enum l9p_omode,
+    gid_t, struct stat *);
+static int fs_imkdir(void *, struct l9p_fid *, char *,
+    bool, mode_t, gid_t, struct stat *);
+static int fs_imkfifo(void *, struct l9p_fid *, char *,
+    bool, mode_t, gid_t, struct stat *);
+static int fs_imknod(void *, struct l9p_fid *, char *,
+    bool, mode_t, dev_t, gid_t, struct stat *);
+static int fs_imksocket(void *, struct l9p_fid *, char *,
+    bool, mode_t, gid_t, struct stat *);
+static int fs_isymlink(void *, struct l9p_fid *, char *, char *,
+    gid_t, struct stat *);
+
+/*
+ * Internal functions implementing backend.
+ */
+static int fs_attach(void *, struct l9p_request *);
+static int fs_clunk(void *, struct l9p_fid *);
+static int fs_create(void *, struct l9p_request *);
+static int fs_open(void *, struct l9p_request *);
+static int fs_read(void *, struct l9p_request *);
+static int fs_remove(void *, struct l9p_fid *);
+static int fs_stat(void *, struct l9p_request *);
+static int fs_walk(void *, struct l9p_request *);
+static int fs_write(void *, struct l9p_request *);
+static int fs_wstat(void *, struct l9p_request *);
+static int fs_statfs(void *, struct l9p_request *);
+static int fs_lopen(void *, struct l9p_request *);
+static int fs_lcreate(void *, struct l9p_request *);
+static int fs_symlink(void *, struct l9p_request *);
+static int fs_mknod(void *, struct l9p_request *);
+static int fs_rename(void *, struct l9p_request *);
+static int fs_readlink(void *, struct l9p_request *);
+static int fs_getattr(void *, struct l9p_request *);
+static int fs_setattr(void *, struct l9p_request *);
+static int fs_xattrwalk(void *, struct l9p_request *);
+static int fs_xattrcreate(void *, struct l9p_request *);
+static int fs_readdir(void *, struct l9p_request *);
+static int fs_fsync(void *, struct l9p_request *);
+static int fs_lock(void *, struct l9p_request *);
+static int fs_getlock(void *, struct l9p_request *);
+static int fs_link(void *, struct l9p_request *);
+static int fs_renameat(void *, struct l9p_request *);
+static int fs_unlinkat(void *, struct l9p_request *);
+static void fs_freefid(void *, struct l9p_fid *);
+
+/*
+ * Convert from 9p2000 open/create mode to Unix-style O_* flags.
+ * This includes 9p2000.u extensions, but not 9p2000.L protocol,
+ * which has entirely different open, create, etc., flag bits.
+ *
+ * The <mode> given here is the one-byte (uint8_t) "mode"
+ * argument to Tcreate or Topen, so it can have at most 8 bits.
+ *
+ * https://swtch.com/plan9port/man/man9/open.html and
+ * http://plan9.bell-labs.com/magic/man2html/5/open
+ * both say:
+ *
+ *   The [low two bits of the] mode field determines the
+ *   type of I/O ... [I]f mode has the OTRUNC (0x10) bit
+ *   set, the file is to be truncated, which requires write
+ *   permission ...; if the mode has the ORCLOSE (0x40) bit
+ *   set, the file is to be removed when the fid is clunked,
+ *   which requires permission to remove the file from its
+ *   directory.  All other bits in mode should be zero.  It
+ *   is illegal to write a directory, truncate it, or
+ *   attempt to remove it on close.
+ *
+ * 9P2000.u may add ODIRECT (0x80); this is not completely clear.
+ * The fcall.h header defines OCEXEC (0x20) as well, but it makes
+ * no sense to send this to a server.  There seem to be no bits
+ * 0x04 and 0x08.
+ *
+ * We always turn on O_NOCTTY since as a server, we never want
+ * to gain a controlling terminal.  We always turn on O_NOFOLLOW
+ * for reasons described elsewhere.
+ */
+static int
+fs_oflags_dotu(int mode, int *aflags)
+{
+	int flags;
+#define	CONVERT(theirs, ours) \
+	do { \
+		if (mode & (theirs)) { \
+			mode &= ~(theirs); \
+			flags |= ours; \
+		} \
+	} while (0)
+
+	switch (mode & L9P_OACCMODE) {
+
+	case L9P_OREAD:
+	default:
+		flags = O_RDONLY;
+		break;
+
+	case L9P_OWRITE:
+		flags = O_WRONLY;
+		break;
+
+	case L9P_ORDWR:
+		flags = O_RDWR;
+		break;
+
+	case L9P_OEXEC:
+		if (mode & L9P_OTRUNC)
+			return (EINVAL);
+		flags = O_RDONLY;
+		break;
+	}
+
+	flags |= O_NOCTTY | O_NOFOLLOW;
+
+	CONVERT(L9P_OTRUNC, O_TRUNC);
+
+	/*
+	 * Now take away some flags locally:
+	 *   the access mode (already translated)
+	 *   ORCLOSE - caller only
+	 *   OCEXEC - makes no sense in server
+	 *   ODIRECT - not applicable here
+	 * If there are any flag bits left after this,
+	 * we were unable to translate them.  For now, let's
+	 * treat this as EINVAL so that we can catch problems.
+	 */
+	mode &= ~(L9P_OACCMODE | L9P_ORCLOSE | L9P_OCEXEC | L9P_ODIRECT);
+	if (mode != 0) {
+		L9P_LOG(L9P_INFO,
+		    "fs_oflags_dotu: untranslated bits: %#x",
+		    (unsigned)mode);
+		return (EINVAL);
+	}
+
+	*aflags = flags;
+	return (0);
+#undef CONVERT
+}
+
+/*
+ * Convert from 9P2000.L (Linux) open mode bits to O_* flags.
+ * See fs_oflags_dotu above.
+ *
+ * Linux currently does not have open-for-exec, but there is a
+ * proposal for it using O_PATH|O_NOFOLLOW, now handled here.
+ *
+ * We may eventually also set L9P_ORCLOSE for L_O_TMPFILE.
+ */
+static int
+fs_oflags_dotl(uint32_t l_mode, int *aflags, enum l9p_omode *ap9)
+{
+	int flags;
+	enum l9p_omode p9;
+#define	CLEAR(theirs)	l_mode &= ~(uint32_t)(theirs)
+#define	CONVERT(theirs, ours) \
+	do { \
+		if (l_mode & (theirs)) { \
+			CLEAR(theirs); \
+			flags |= ours; \
+		} \
+	} while (0)
+
+	/*
+	 * Linux O_RDONLY, O_WRONLY, O_RDWR (0,1,2) match BSD/MacOS.
+	 */
+	flags = l_mode & O_ACCMODE;
+	if (flags == 3)
+		return (EINVAL);
+	CLEAR(O_ACCMODE);
+
+	if ((l_mode & (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) ==
+		    (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) {
+		CLEAR(L9P_L_O_PATH | L9P_L_O_NOFOLLOW);
+		p9 = L9P_OEXEC;
+	} else {
+		/*
+		 * Slightly dirty, but same dirt, really, as
+		 * setting flags from l_mode & O_ACCMODE.
+		 */
+		p9 = (enum l9p_omode)flags;	/* slightly dirty */
+	}
+
+	/* turn L_O_TMPFILE into L9P_ORCLOSE in *p9? */
+	if (l_mode & L9P_L_O_TRUNC)
+		p9 |= L9P_OTRUNC;	/* but don't CLEAR yet */
+
+	flags |= O_NOCTTY | O_NOFOLLOW;
+
+	/*
+	 * L_O_CREAT seems to be noise, since we get separate open
+	 * and create.  But it is actually set sometimes.  We just
+	 * throw it out here; create ops must set it themselves and
+	 * open ops have no permissions bits and hence cannot create.
+	 *
+	 * L_O_EXCL does make sense on create ops, i.e., we can
+	 * take a create op with or without L_O_EXCL.  We pass that
+	 * through.
+	 */
+	CLEAR(L9P_L_O_CREAT);
+	CONVERT(L9P_L_O_EXCL, O_EXCL);
+	CONVERT(L9P_L_O_TRUNC, O_TRUNC);
+	CONVERT(L9P_L_O_DIRECTORY, O_DIRECTORY);
+	CONVERT(L9P_L_O_APPEND, O_APPEND);
+	CONVERT(L9P_L_O_NONBLOCK, O_NONBLOCK);
+
+	/*
+	 * Discard these as useless noise at our (server) end.
+	 * (NOATIME might be useful but we can only set it on a
+	 * per-mount basis.)
+	 */
+	CLEAR(L9P_L_O_CLOEXEC);
+	CLEAR(L9P_L_O_DIRECT);
+	CLEAR(L9P_L_O_DSYNC);
+	CLEAR(L9P_L_O_FASYNC);
+	CLEAR(L9P_L_O_LARGEFILE);
+	CLEAR(L9P_L_O_NOATIME);
+	CLEAR(L9P_L_O_NOCTTY);
+	CLEAR(L9P_L_O_NOFOLLOW);
+	CLEAR(L9P_L_O_SYNC);
+
+	if (l_mode != 0) {
+		L9P_LOG(L9P_INFO,
+		    "fs_oflags_dotl: untranslated bits: %#x",
+		    (unsigned)l_mode);
+		return (EINVAL);
+	}
+
+	*aflags = flags;
+	*ap9 = p9;
+	return (0);
+#undef CLEAR
+#undef CONVERT
+}
+
+static struct passwd *
+fs_getpwuid(struct fs_softc *sc, uid_t uid, struct r_pgdata *pg)
+{
+#if defined(WITH_CASPER)
+	return (r_cap_getpwuid(sc->fs_cappwd, uid, pg));
+#else
+	(void)sc;
+	return (r_getpwuid(uid, pg));
+#endif
+}
+
+static struct group *
+fs_getgrgid(struct fs_softc *sc, gid_t gid, struct r_pgdata *pg)
+{
+#if defined(WITH_CASPER)
+	return (r_cap_getgrgid(sc->fs_capgrp, gid, pg));
+#else
+	(void)sc;
+	return (r_getgrgid(gid, pg));
+#endif
+}
+
+/*
+ * Build full name of file by appending given name to directory name.
+ */
+static int
+fs_buildname(struct l9p_fid *dir, char *name, char *buf, size_t size)
+{
+	struct fs_fid *dirf = dir->lo_aux;
+	size_t dlen, nlen1;
+
+	assert(dirf != NULL);
+	dlen = strlen(dirf->ff_name);
+	nlen1 = strlen(name) + 1;	/* +1 for '\0' */
+	if (dlen + 1 + nlen1 > size)
+		return (ENAMETOOLONG);
+	memcpy(buf, dirf->ff_name, dlen);
+	buf[dlen] = '/';
+	memcpy(buf + dlen + 1, name, nlen1);
+	return (0);
+}
+
+/*
+ * Build parent name of file by splitting it off.  Return an error
+ * if the given fid represents the root, so that there is no such
+ * parent, or if the discovered parent is not a directory.
+ */
+static int
+fs_pdir(struct fs_softc *sc __unused, struct l9p_fid *fid, char *buf,
+    size_t size, struct stat *st)
+{
+	struct fs_fid *ff;
+	char *path;
+
+	ff = fid->lo_aux;
+	assert(ff != NULL);
+	path = ff->ff_name;
+	path = r_dirname(path, buf, size);
+	if (path == NULL)
+		return (ENAMETOOLONG);
+	if (fstatat(ff->ff_dirfd, path, st, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (!S_ISDIR(st->st_mode))
+		return (ENOTDIR);
+	return (0);
+}
+
+/*
+ * Like fs_buildname() but for adding a file name to a buffer
+ * already holding a directory name.  Essentially does
+ *     strcat(dbuf, "/");
+ *     strcat(dbuf, fname);
+ * but with size checking and an ENAMETOOLONG error as needed.
+ *
+ * (Think of the function name as "directory plus-equals file".)
+ */
+static int
+fs_dpf(char *dbuf, char *fname, size_t size)
+{
+	size_t dlen, nlen1;
+
+	dlen = strlen(dbuf);
+	nlen1 = strlen(fname) + 1;
+	if (dlen + 1 + nlen1 > size)
+		return (ENAMETOOLONG);
+	dbuf[dlen] = '/';
+	memcpy(dbuf + dlen + 1, fname, nlen1);
+	return (0);
+}
+
+/*
+ * Prepare to create a new directory entry (open with O_CREAT,
+ * mkdir, etc -- any operation that creates a new inode),
+ * operating in parent data <dir>, based on authinfo <ai> and
+ * effective gid <egid>.
+ *
+ * The new entity should be owned by user/group <*nuid, *ngid>,
+ * if it's really a new entity.  It will be a directory if isdir.
+ *
+ * Returns an error number if the entry should not be created
+ * (e.g., read-only file system or no permission to write in
+ * parent directory).  Always sets *nuid and *ngid on success:
+ * in the worst case, when there is no available ID, this will
+ * use the parent directory's IDs.  Fills in <*st> on success.
+ */
+static int
+fs_nde(struct fs_softc *sc, struct l9p_fid *dir, bool isdir, gid_t egid,
+    struct stat *st, uid_t *nuid, gid_t *ngid)
+{
+	struct fs_fid *dirf;
+	struct fs_authinfo *ai;
+	int32_t op;
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+	dirf = dir->lo_aux;
+	assert(dirf != NULL);
+	if (fstatat(dirf->ff_dirfd, dirf->ff_name, st,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (!S_ISDIR(st->st_mode))
+		return (ENOTDIR);
+	dirf = dir->lo_aux;
+	ai = dirf->ff_ai;
+	fillacl(dirf);
+	op = isdir ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
+	error = check_access(op, dirf->ff_acl, st, NULL, NULL, ai, egid);
+	if (error)
+		return (EPERM);
+
+	*nuid = ai->ai_uid != (uid_t)-1 ? ai->ai_uid : st->st_uid;
+	*ngid = egid != (gid_t)-1 ? egid :
+	    ai->ai_ngids > 0 ?  ai->ai_gids[0] : st->st_gid;
+	return (0);
+}
+
+/*
+ * Allocate new open-file data structure to attach to a fid.
+ *
+ * The new file's authinfo is the same as the old one's, and
+ * we gain a reference.
+ */
+static struct fs_fid *
+open_fid(int dirfd, const char *path, struct fs_authinfo *ai, bool creating)
+{
+	struct fs_fid *ret;
+	uint32_t newcount;
+	int error;
+
+	ret = l9p_calloc(1, sizeof(*ret));
+	error = pthread_mutex_init(&ret->ff_mtx, NULL);
+	if (error) {
+		free(ret);
+		return (NULL);
+	}
+	ret->ff_fd = -1;
+	ret->ff_dirfd = dirfd;
+	ret->ff_name = strdup(path);
+	if (ret->ff_name == NULL) {
+		pthread_mutex_destroy(&ret->ff_mtx);
+		free(ret);
+		return (NULL);
+	}
+	pthread_mutex_lock(&ai->ai_mtx);
+	newcount = ++ai->ai_refcnt;
+	pthread_mutex_unlock(&ai->ai_mtx);
+	/*
+	 * If we just incremented the count to 1, we're the *first*
+	 * reference.  This is only allowed when creating the authinfo,
+	 * otherwise it means something has gone wrong.  This cannot
+	 * catch every bad (re)use of a freed authinfo but it may catch
+	 * a few.
+	 */
+	assert(newcount > 1 || creating);
+	L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
+	    (void *)ai, (u_long)newcount);
+	ret->ff_ai = ai;
+	return (ret);
+}
+
+static void
+dostat(struct fs_softc *sc, struct l9p_stat *s, char *name,
+    struct stat *buf, bool dotu)
+{
+	struct passwd *user;
+	struct group *group;
+
+	memset(s, 0, sizeof(struct l9p_stat));
+
+	generate_qid(buf, &s->qid);
+
+	s->type = 0;
+	s->dev = 0;
+	s->mode = buf->st_mode & 0777;
+
+	if (S_ISDIR(buf->st_mode))
+		s->mode |= L9P_DMDIR;
+
+	if (S_ISLNK(buf->st_mode) && dotu)
+		s->mode |= L9P_DMSYMLINK;
+
+	if (S_ISCHR(buf->st_mode) || S_ISBLK(buf->st_mode))
+		s->mode |= L9P_DMDEVICE;
+
+	if (S_ISSOCK(buf->st_mode))
+		s->mode |= L9P_DMSOCKET;
+
+	if (S_ISFIFO(buf->st_mode))
+		s->mode |= L9P_DMNAMEDPIPE;
+
+	s->atime = (uint32_t)buf->st_atime;
+	s->mtime = (uint32_t)buf->st_mtime;
+	s->length = (uint64_t)buf->st_size;
+
+	s->name = r_basename(name, NULL, 0);
+
+	if (!dotu) {
+		struct r_pgdata udata, gdata;
+
+		user = fs_getpwuid(sc, buf->st_uid, &udata);
+		group = fs_getgrgid(sc, buf->st_gid, &gdata);
+		s->uid = user != NULL ? strdup(user->pw_name) : NULL;
+		s->gid = group != NULL ? strdup(group->gr_name) : NULL;
+		s->muid = user != NULL ? strdup(user->pw_name) : NULL;
+		r_pgfree(&udata);
+		r_pgfree(&gdata);
+	} else {
+		/*
+		 * When using 9P2000.u, we don't need to bother about
+		 * providing user and group names in textual form.
+		 *
+		 * NB: if the asprintf()s fail, s->extension should
+		 * be unset so we can ignore these.
+		 */
+		s->n_uid = buf->st_uid;
+		s->n_gid = buf->st_gid;
+		s->n_muid = buf->st_uid;
+
+		if (S_ISLNK(buf->st_mode)) {
+			char target[MAXPATHLEN];
+			ssize_t ret = readlink(name, target, MAXPATHLEN);
+
+			if (ret < 0) {
+				s->extension = NULL;
+				return;
+			}
+
+			s->extension = strndup(target, (size_t)ret);
+		}
+
+		if (S_ISBLK(buf->st_mode)) {
+			asprintf(&s->extension, "b %d %d", major(buf->st_rdev),
+			    minor(buf->st_rdev));
+		}
+
+		if (S_ISCHR(buf->st_mode)) {
+			asprintf(&s->extension, "c %d %d", major(buf->st_rdev),
+			    minor(buf->st_rdev));
+		}
+	}
+}
+
+static void dostatfs(struct l9p_statfs *out, struct statfs *in, long namelen)
+{
+
+	out->type = L9P_FSTYPE;
+	out->bsize = in->f_bsize;
+	out->blocks = in->f_blocks;
+	out->bfree = in->f_bfree;
+	out->bavail = in->f_bavail;
+	out->files = in->f_files;
+	out->ffree = in->f_ffree;
+	out->namelen = (uint32_t)namelen;
+	out->fsid = ((uint64_t)in->f_fsid.val[0] << 32) |
+	    (uint64_t)in->f_fsid.val[1];
+}
+
+static void
+generate_qid(struct stat *buf, struct l9p_qid *qid)
+{
+	qid->path = buf->st_ino;
+	qid->version = 0;
+
+	if (S_ISREG(buf->st_mode))
+		qid->type |= L9P_QTFILE;
+
+	if (S_ISDIR(buf->st_mode))
+		qid->type |= L9P_QTDIR;
+
+	if (S_ISLNK(buf->st_mode))
+		qid->type |= L9P_QTSYMLINK;
+}
+
+/*
+ * Fill in ff->ff_acl if it's not set yet.  Skip if the "don't use
+ * ACLs" flag is set, and use the flag to remember failure so
+ * we don't bother retrying either.
+ */
+static void
+fillacl(struct fs_fid *ff)
+{
+
+	if (ff->ff_acl == NULL && (ff->ff_flags & FF_NO_NFSV4_ACL) == 0) {
+		ff->ff_acl = look_for_nfsv4_acl(ff, ff->ff_fd, ff->ff_name);
+		if (ff->ff_acl == NULL)
+			ff->ff_flags |= FF_NO_NFSV4_ACL;
+	}
+}
+
+/*
+ * Get an ACL given fd and/or path name.  We check for the "don't get
+ * ACL" flag in the given ff_fid data structure first, but don't set
+ * the flag here.  The fillacl() code is similar but will set the
+ * flag; it also uses the ff_fd and ff_name directly.
+ *
+ * (This is used to get ACLs for parent directories, for instance.)
+ */
+static struct l9p_acl *
+getacl(struct fs_fid *ff, int fd, const char *path)
+{
+
+	if (ff->ff_flags & FF_NO_NFSV4_ACL)
+		return (NULL);
+	return look_for_nfsv4_acl(ff, fd, path);
+}
+
+/*
+ * Drop cached ff->ff_acl, e.g., after moving from one directory to
+ * another, where inherited ACLs might change.
+ */
+static void
+dropacl(struct fs_fid *ff)
+{
+
+	l9p_acl_free(ff->ff_acl);
+	ff->ff_acl = NULL;
+	ff->ff_flags = ff->ff_ai->ai_flags;
+}
+
+/*
+ * Check to see if we can find NFSv4 ACLs for the given file.
+ * If we have an open fd, we can use that, otherwise we need
+ * to use the path.
+ */
+static struct l9p_acl *
+look_for_nfsv4_acl(struct fs_fid *ff, int fd, const char *path)
+{
+	struct l9p_acl *acl;
+	acl_t sysacl;
+	int doclose = 0;
+
+	if (fd < 0) {
+		fd = openat(ff->ff_dirfd, path, 0);
+		doclose = 1;
+	}
+
+	sysacl = acl_get_fd_np(fd, ACL_TYPE_NFS4);
+	if (sysacl == NULL) {
+		/*
+		 * EINVAL means no NFSv4 ACLs apply for this file.
+		 * Other error numbers indicate some kind of problem.
+		 */
+		if (errno != EINVAL) {
+			L9P_LOG(L9P_ERROR,
+			    "error retrieving NFSv4 ACL from "
+			    "fdesc %d (%s): %s", fd,
+			    path, strerror(errno));
+		}
+
+		if (doclose)
+			close(fd);
+
+		return (NULL);
+	}
+#if defined(HAVE_FREEBSD_ACLS)
+	acl = l9p_freebsd_nfsv4acl_to_acl(sysacl);
+#else
+	acl = NULL; /* XXX need a l9p_darwin_acl_to_acl */
+#endif
+	acl_free(sysacl);
+
+	if (doclose)
+		close(fd);
+
+	return (acl);
+}
+
+/*
+ * Verify that the user whose authinfo is in <ai> and effective
+ * group ID is <egid> ((gid_t)-1 means no egid supplied) has
+ * permission to do something.
+ *
+ * The "something" may be rather complex: we allow NFSv4 style
+ * operation masks here, and provide parent and child ACLs and
+ * stat data.  At most one of pacl+pst and cacl+cst can be NULL,
+ * unless ACLs are not supported; then pacl and cacl can both
+ * be NULL but pst or cst must be non-NULL depending on the
+ * operation.
+ */
+static int
+check_access(int32_t opmask,
+    struct l9p_acl *pacl, struct stat *pst,
+    struct l9p_acl *cacl, struct stat *cst,
+    struct fs_authinfo *ai, gid_t egid)
+{
+	struct l9p_acl_check_args args;
+
+	/*
+	 * If we have ACLs, use them exclusively, ignoring Unix
+	 * permissions.  Otherwise, fall back on stat st_mode
+	 * bits, and allow super-user as well.
+	 */
+	args.aca_uid = ai->ai_uid;
+	args.aca_gid = egid;
+	args.aca_groups = ai->ai_gids;
+	args.aca_ngroups = (size_t)ai->ai_ngids;
+	args.aca_parent = pacl;
+	args.aca_pstat = pst;
+	args.aca_child = cacl;
+	args.aca_cstat = cst;
+	args.aca_aclmode = pacl == NULL && cacl == NULL
+	    ? L9P_ACM_STAT_MODE
+	    : L9P_ACM_NFS_ACL | L9P_ACM_ZFS_ACL;
+
+	args.aca_superuser = true;
+	return (l9p_acl_check_access(opmask, &args));
+}
+
+static int
+fs_attach(void *softc, struct l9p_request *req)
+{
+	struct fs_authinfo *ai;
+	struct fs_softc *sc = (struct fs_softc *)softc;
+	struct fs_fid *file;
+	struct passwd *pwd;
+	struct stat st;
+	struct r_pgdata udata;
+	uint32_t n_uname;
+	gid_t *gids;
+	uid_t uid;
+	int error;
+	int ngroups;
+
+	assert(req->lr_fid != NULL);
+
+	/*
+	 * Single-thread pwd/group related items.  We have a reentrant
+	 * r_getpwuid but not a reentrant r_getpwnam, and l9p_getgrlist
+	 * may use non-reentrant C library getgr* routines.
+	 */
+	pthread_mutex_lock(&fs_attach_mutex);
+
+	n_uname = req->lr_req.tattach.n_uname;
+	if (n_uname != L9P_NONUNAME) {
+		uid = (uid_t)n_uname;
+		pwd = fs_getpwuid(sc, uid, &udata);
+		if (pwd == NULL)
+			L9P_LOG(L9P_DEBUG,
+			    "Tattach: uid %ld: no such user", (long)uid);
+	} else {
+		uid = (uid_t)-1;
+#if defined(WITH_CASPER)
+		pwd = cap_getpwnam(sc->fs_cappwd, req->lr_req.tattach.uname);
+#else
+		pwd = getpwnam(req->lr_req.tattach.uname);
+#endif
+		if (pwd == NULL)
+			L9P_LOG(L9P_DEBUG,
+			    "Tattach: %s: no such user",
+			    req->lr_req.tattach.uname);
+	}
+
+	/*
+	 * If caller didn't give a numeric UID, pick it up from pwd
+	 * if possible.  If that doesn't work we can't continue.
+	 *
+	 * Note that pwd also supplies the group set.  This assumes
+	 * the server has the right mapping; this needs improvement.
+	 * We do at least support ai->ai_ngids==0 properly now though.
+	 */
+	if (uid == (uid_t)-1 && pwd != NULL)
+		uid = pwd->pw_uid;
+	if (uid == (uid_t)-1)
+		error = EPERM;
+	else {
+		error = 0;
+		if (fstat(sc->fs_rootfd, &st) != 0)
+			error = errno;
+		else if (!S_ISDIR(st.st_mode))
+			error = ENOTDIR;
+	}
+	if (error) {
+		pthread_mutex_unlock(&fs_attach_mutex);
+		L9P_LOG(L9P_DEBUG,
+		    "Tattach: denying uid=%ld access to rootdir: %s",
+		    (long)uid, strerror(error));
+		/*
+		 * Pass ENOENT and ENOTDIR through for diagnosis;
+		 * others become EPERM.  This should not leak too
+		 * much security.
+		 */
+		return (error == ENOENT || error == ENOTDIR ? error : EPERM);
+	}
+
+	if (pwd != NULL) {
+		/*
+		 * This either succeeds and fills in ngroups and
+		 * returns non-NULL, or fails and sets ngroups to 0
+		 * and returns NULL.  Either way ngroups is correct.
+		 */
+		gids = l9p_getgrlist(pwd->pw_name, pwd->pw_gid, &ngroups);
+	} else {
+		gids = NULL;
+		ngroups = 0;
+	}
+
+	/*
+	 * Done with pwd and group related items that may use
+	 * non-reentrant C library routines; allow other threads in.
+	 */
+	pthread_mutex_unlock(&fs_attach_mutex);
+
+	ai = malloc(sizeof(*ai) + (size_t)ngroups * sizeof(gid_t));
+	if (ai == NULL) {
+		free(gids);
+		return (ENOMEM);
+	}
+	error = pthread_mutex_init(&ai->ai_mtx, NULL);
+	if (error) {
+		free(gids);
+		free(ai);
+		return (error);
+	}
+	ai->ai_refcnt = 0;
+	ai->ai_uid = uid;
+	ai->ai_flags = 0;	/* XXX for now */
+	ai->ai_ngids = ngroups;
+	memcpy(ai->ai_gids, gids, (size_t)ngroups * sizeof(gid_t));
+	free(gids);
+
+	file = open_fid(sc->fs_rootfd, ".", ai, true);
+	if (file == NULL) {
+		pthread_mutex_destroy(&ai->ai_mtx);
+		free(ai);
+		return (ENOMEM);
+	}
+
+	req->lr_fid->lo_aux = file;
+	generate_qid(&st, &req->lr_resp.rattach.qid);
+	return (0);
+}
+
+static int
+fs_clunk(void *softc __unused, struct l9p_fid *fid)
+{
+	struct fs_fid *file;
+
+	file = fid->lo_aux;
+	assert(file != NULL);
+
+	if (file->ff_dir) {
+		closedir(file->ff_dir);
+		file->ff_dir = NULL;
+	} else if (file->ff_fd != -1) {
+		close(file->ff_fd);
+		file->ff_fd = -1;
+	}
+
+	return (0);
+}
+
+/*
+ * Create ops.
+ *
+ * We are to create a new file under some existing path,
+ * where the new file's name is in the Tcreate request and the
+ * existing path is due to a fid-based file (req->lr_fid).
+ *
+ * One op (create regular file) sets file->fd, the rest do not.
+ */
+static int
+fs_create(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	uint32_t dmperm;
+	mode_t perm;
+	char *name;
+	int error;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tcreate.name;
+	dmperm = req->lr_req.tcreate.perm;
+	perm = (mode_t)(dmperm & 0777);
+
+	if (dmperm & L9P_DMDIR)
+		error = fs_imkdir(softc, dir, name, true,
+		    perm, (gid_t)-1, &st);
+	else if (dmperm & L9P_DMSYMLINK)
+		error = fs_isymlink(softc, dir, name,
+		    req->lr_req.tcreate.extension, (gid_t)-1, &st);
+	else if (dmperm & L9P_DMNAMEDPIPE)
+		error = fs_imkfifo(softc, dir, name, true,
+		    perm, (gid_t)-1, &st);
+	else if (dmperm & L9P_DMSOCKET)
+		error = fs_imksocket(softc, dir, name, true,
+		    perm, (gid_t)-1, &st);
+	else if (dmperm & L9P_DMDEVICE) {
+		unsigned int major, minor;
+		char type;
+		dev_t dev;
+
+		/*
+		 * ??? Should this be testing < 3?  For now, allow a single
+		 * integer mode with minor==0 implied.
+		 */
+		minor = 0;
+		if (sscanf(req->lr_req.tcreate.extension, "%c %u %u",
+		    &type, &major, &minor) < 2) {
+			return (EINVAL);
+		}
+
+		switch (type) {
+		case 'b':
+			perm |= S_IFBLK;
+			break;
+		case 'c':
+			perm |= S_IFCHR;
+			break;
+		default:
+			return (EINVAL);
+		}
+		dev = makedev(major, minor);
+		error = fs_imknod(softc, dir, name, true, perm, dev,
+		    (gid_t)-1, &st);
+	} else {
+		enum l9p_omode p9;
+		int flags;
+
+		p9 = req->lr_req.tcreate.mode;
+		error = fs_oflags_dotu(p9, &flags);
+		if (error)
+			return (error);
+		error = fs_icreate(softc, dir, name, flags,
+		    true, perm, (gid_t)-1, &st);
+		req->lr_resp.rcreate.iounit = req->lr_conn->lc_max_io_size;
+	}
+
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rcreate.qid);
+
+	return (error);
+}
+
+/*
+ * https://swtch.com/plan9port/man/man9/open.html and
+ * http://plan9.bell-labs.com/magic/man2html/5/open
+ * say that permissions are actually
+ *     perm & (~0666 | (dir.perm & 0666))
+ * for files, and
+ *     perm & (~0777 | (dir.perm & 0777))
+ * for directories.  That is, the parent directory may
+ * take away permissions granted by the operation.
+ *
+ * This seems a bit restrictive; probably
+ * there should be a control knob for this.
+ */
+static inline mode_t
+fs_p9perm(mode_t perm, mode_t dir_perm, bool isdir)
+{
+
+	if (isdir)
+		perm &= ~0777 | (dir_perm & 0777);
+	else
+		perm &= ~0666 | (dir_perm & 0666);
+	return (perm);
+}
+
+/*
+ * Internal form of create (plain file).
+ *
+ * Our caller takes care of splitting off all the special
+ * types of create (mknod, etc), so this is purely for files.
+ * We receive the fs_softc <softc>, the directory fid <dir>
+ * in which the new file is to be created, the name of the
+ * new file, a flag <isp9> indicating whether to do plan9 style
+ * permissions or Linux style permissions, the permissions <perm>,
+ * an effective group id <egid>, and a pointer to a stat structure
+ * <st> to fill in describing the final result on success.
+ *
+ * On successful create, the fid switches to the newly created
+ * file, which is now open; its associated file-name changes too.
+ *
+ * Note that the original (dir) fid is never currently open,
+ * so there is nothing to close.
+ */
+static int
+fs_icreate(void *softc, struct l9p_fid *dir, char *name, int flags,
+    bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+	struct fs_fid *file;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error, fd;
+
+	file = dir->lo_aux;
+
+	/*
+	 * Build full path name from directory + file name.  We'll
+	 * check permissions on the parent directory, then race to
+	 * create the file before anything bad happens like symlinks.
+	 *
+	 * (To close this race we need to use openat(), which is
+	 * left for a later version of this code.)
+	 */
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	/* In case of success, we will need a new file->ff_name. */
+	name = strdup(newname);
+	if (name == NULL)
+		return (ENOMEM);
+
+	/* Check create permission and compute new file ownership. */
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error) {
+		free(name);
+		return (error);
+	}
+
+	/* Adjust new-file permissions for Plan9 protocol. */
+	if (isp9)
+		perm = fs_p9perm(perm, st->st_mode, false);
+
+	/* Create is always exclusive so O_TRUNC is irrelevant. */
+	fd = openat(file->ff_dirfd, newname, flags | O_CREAT | O_EXCL, perm);
+	if (fd < 0) {
+		error = errno;
+		free(name);
+		return (error);
+	}
+
+	/* Fix permissions and owner. */
+	if (fchmod(fd, perm) != 0 ||
+	    fchown(fd, uid, gid) != 0 ||
+	    fstat(fd, st) != 0) {
+		error = errno;
+		(void) close(fd);
+		/* unlink(newname); ? */
+		free(name);
+		return (error);
+	}
+
+	/* It *was* a directory; now it's a file, and it's open. */
+	free(file->ff_name);
+	file->ff_name = name;
+	file->ff_fd = fd;
+	return (0);
+}
+
+/*
+ * Internal form of open: stat file and verify permissions (from p9
+ * argument), then open the file-or-directory, leaving the internal
+ * fs_fid fields set up.  If we cannot open the file, return a
+ * suitable error number, and leave everything unchanged.
+ *
+ * To mitigate the race between permissions testing and the actual
+ * open, we can stat the file twice (once with lstat() before open,
+ * then with fstat() after).  We assume O_NOFOLLOW is set in flags,
+ * so if some other race-winner substitutes in a symlink we won't
+ * open it here.  (However, embedded symlinks, if they occur, are
+ * still an issue.  Ideally we would like to have an O_NEVERFOLLOW
+ * that fails on embedded symlinks, and a way to pass this to
+ * lstat() as well.)
+ *
+ * When we use opendir() we cannot pass O_NOFOLLOW, so we must rely
+ * on substitution-detection via fstat().  To simplify the code we
+ * just always re-check.
+ *
+ * (For a proper fix in the future, we can require openat(), keep
+ * each parent directory open during walk etc, and allow only final
+ * name components with O_NOFOLLOW.)
+ *
+ * On successful return, st has been filled in.
+ */
+static int
+fs_iopen(void *softc, struct l9p_fid *fid, int flags, enum l9p_omode p9,
+    gid_t egid __unused, struct stat *st)
+{
+	struct fs_softc *sc = softc;
+	struct fs_fid *file;
+	struct stat first;
+	int32_t op;
+	char *name;
+	int error;
+	int fd;
+	DIR *dirp;
+
+	/* Forbid write ops on read-only file system. */
+	if (sc->fs_readonly) {
+		if ((flags & O_TRUNC) != 0)
+			return (EROFS);
+		if ((flags & O_ACCMODE) != O_RDONLY)
+			return (EROFS);
+		if (p9 & L9P_ORCLOSE)
+			return (EROFS);
+	}
+
+	file = fid->lo_aux;
+	assert(file != NULL);
+	name = file->ff_name;
+
+	if (fstatat(file->ff_dirfd, name, &first, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (S_ISLNK(first.st_mode))
+		return (EPERM);
+
+	/* Can we rely on O_APPEND here?  Best not, can be cleared. */
+	switch (flags & O_ACCMODE) {
+	case O_RDONLY:
+		op = L9P_ACE_READ_DATA;
+		break;
+	case O_WRONLY:
+		op = L9P_ACE_WRITE_DATA;
+		break;
+	case O_RDWR:
+		op = L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA;
+		break;
+	default:
+		return (EINVAL);
+	}
+	fillacl(file);
+	error = check_access(op, NULL, NULL, file->ff_acl, &first,
+	    file->ff_ai, (gid_t)-1);
+	if (error)
+		return (error);
+
+	if (S_ISDIR(first.st_mode)) {
+		/* Forbid write or truncate on directory. */
+		if ((flags & O_ACCMODE) != O_RDONLY || (flags & O_TRUNC))
+			return (EPERM);
+		fd = openat(file->ff_dirfd, name, O_DIRECTORY);
+		dirp = fdopendir(fd);
+		if (dirp == NULL)
+			return (EPERM);
+		fd = dirfd(dirp);
+	} else {
+		dirp = NULL;
+		fd = openat(file->ff_dirfd, name, flags);
+		if (fd < 0)
+			return (EPERM);
+	}
+
+	/*
+	 * We have a valid fd, and maybe non-null dirp.  Re-check
+	 * the file, and fail if st_dev or st_ino changed.
+	 */
+	if (fstat(fd, st) != 0 ||
+	    first.st_dev != st->st_dev ||
+	    first.st_ino != st->st_ino) {
+		if (dirp != NULL)
+			(void) closedir(dirp);
+		else
+			(void) close(fd);
+		return (EPERM);
+	}
+	if (dirp != NULL)
+		file->ff_dir = dirp;
+	else
+		file->ff_fd = fd;
+	return (0);
+}
+
+/*
+ * Internal form of mkdir (common code for all forms).
+ * We receive the fs_softc <softc>, the directory fid <dir>
+ * in which the new entry is to be created, the name of the
+ * new entry, a flag <isp9> indicating whether to do plan9 style
+ * permissions or Linux style permissions, the permissions <perm>,
+ * an effective group id <egid>, and a pointer to a stat structure
+ * <st> to fill in describing the final result on success.
+ *
+ * See also fs_icreate() above.
+ */
+static int
+fs_imkdir(void *softc, struct l9p_fid *dir, char *name,
+    bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error, fd;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, true, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (isp9)
+		perm = fs_p9perm(perm, st->st_mode, true);
+
+	if (mkdirat(ff->ff_dirfd, newname, perm) != 0)
+		return (errno);
+
+	fd = openat(ff->ff_dirfd, newname,
+	    O_DIRECTORY | O_RDONLY | O_NOFOLLOW);
+	if (fd < 0 ||
+	    fchown(fd, uid, gid) != 0 ||
+	    fchmod(fd, perm) != 0 ||
+	    fstat(fd, st) != 0) {
+		error = errno;
+		/* rmdir(newname) ? */
+	}
+	if (fd >= 0)
+		(void) close(fd);
+
+	return (error);
+}
+
+/*
+ * Internal form of mknod (special device).
+ *
+ * The device type (S_IFBLK, S_IFCHR) is included in the <mode> parameter.
+ */
+static int
+fs_imknod(void *softc, struct l9p_fid *dir, char *name,
+    bool isp9, mode_t mode, dev_t dev, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	mode_t perm;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (isp9) {
+		perm = fs_p9perm(mode & 0777, st->st_mode, false);
+		mode = (mode & ~0777) | perm;
+	} else {
+		perm = mode & 0777;
+	}
+
+	if (mknodat(ff->ff_dirfd, newname, mode, dev) != 0)
+		return (errno);
+
+	/* We cannot open the new name; race to use l* syscalls. */
+	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+		error = errno;
+	else if ((st->st_mode & S_IFMT) != (mode & S_IFMT))
+		error = EPERM;		/* ??? lost a race anyway */
+
+	/* if (error) unlink(newname) ? */
+
+	return (error);
+}
+
+/*
+ * Internal form of mkfifo.
+ */
+static int
+fs_imkfifo(void *softc, struct l9p_fid *dir, char *name,
+    bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (isp9)
+		perm = fs_p9perm(perm, st->st_mode, false);
+
+	if (mkfifo(newname, perm) != 0)
+		return (errno);
+
+	/* We cannot open the new name; race to use l* syscalls. */
+	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+		error = errno;
+	else if (!S_ISFIFO(st->st_mode))
+		error = EPERM;		/* ??? lost a race anyway */
+
+	/* if (error) unlink(newname) ? */
+
+	return (error);
+}
+
+/*
+ * Internal form of mksocket.
+ *
+ * This is a bit different because of the horrible socket naming
+ * system (bind() with sockaddr_un sun_path).
+ */
+static int
+fs_imksocket(void *softc, struct l9p_fid *dir, char *name,
+    bool isp9, mode_t perm, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	struct sockaddr_un sun;
+	char *path;
+	char newname[MAXPATHLEN];
+	gid_t gid;
+	uid_t uid;
+	int error = 0, s, fd;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (isp9)
+		perm = fs_p9perm(perm, st->st_mode, false);
+
+	s = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (s < 0)
+		return (errno);
+
+	path = newname;
+	fd = -1;
+#ifdef HAVE_BINDAT
+	/* Try bindat() if needed. */
+	if (strlen(path) >= sizeof(sun.sun_path)) {
+		fd = openat(ff->ff_dirfd, ff->ff_name,
+		    O_RDONLY | O_DIRECTORY | O_NOFOLLOW);
+		if (fd >= 0)
+			path = name;
+	}
+#endif
+
+	/*
+	 * Can only create the socket if the path will fit.
+	 * Even if we are using bindat() there are limits
+	 * (the API for AF_UNIX sockets is ... not good).
+	 *
+	 * Note: in theory we can fill sun_path to the end
+	 * (omitting a terminating '\0') but in at least one
+	 * Unix-like system, this was known to behave oddly,
+	 * so we test for ">=" rather than just ">".
+	 */
+	if (strlen(path) >= sizeof(sun.sun_path)) {
+		error = ENAMETOOLONG;
+		goto out;
+	}
+	sun.sun_family = AF_UNIX;
+	sun.sun_len = sizeof(struct sockaddr_un);
+	strncpy(sun.sun_path, path, sizeof(sun.sun_path));
+
+#ifdef HAVE_BINDAT
+	if (fd >= 0) {
+		if (bindat(fd, s, (struct sockaddr *)&sun, sun.sun_len) < 0)
+			error = errno;
+		goto out;	/* done now, for good or ill */
+	}
+#endif
+
+	if (bind(s, (struct sockaddr *)&sun, sun.sun_len) < 0)
+		error = errno;
+out:
+
+	if (error == 0) {
+		/*
+		 * We believe we created the socket-inode.  Fix
+		 * permissions etc.  Note that we cannot use
+		 * fstat() on the socket descriptor: it succeeds,
+		 * but we get bogus data!
+		 */
+		if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+		    fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 ||
+		    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+			error = errno;
+		else if (!S_ISSOCK(st->st_mode))
+			error = EPERM;		/* ??? lost a race anyway */
+
+		/* if (error) unlink(newname) ? */
+	}
+
+	/*
+	 * It's not clear which error should override, although
+	 * ideally we should never see either close() call fail.
+	 * In any case we do want to try to close both fd and s,
+	 * always.  Let's set error only if it is not already set,
+	 * so that all exit paths can use the same code.
+	 */
+	if (fd >= 0 && close(fd) != 0)
+		if (error == 0)
+			error = errno;
+	if (close(s) != 0)
+		if (error == 0)
+			error = errno;
+
+	return (error);
+}
+
+/*
+ * Internal form of symlink.
+ *
+ * Note that symlinks are presumed to carry no permission bits.
+ * They do have owners, however (who may be charged for quotas).
+ */
+static int
+fs_isymlink(void *softc, struct l9p_fid *dir, char *name,
+    char *symtgt, gid_t egid, struct stat *st)
+{
+	struct fs_fid *ff;
+	gid_t gid;
+	uid_t uid;
+	char newname[MAXPATHLEN];
+	int error;
+
+	ff = dir->lo_aux;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	error = fs_nde(softc, dir, false, egid, st, &uid, &gid);
+	if (error)
+		return (error);
+
+	if (symlinkat(symtgt, ff->ff_dirfd, newname) != 0)
+		return (errno);
+
+	/* We cannot open the new name; race to use l* syscalls. */
+	if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0)
+		error = errno;
+	else if (!S_ISLNK(st->st_mode))
+		error = EPERM;		/* ??? lost a race anyway */
+
+	/* if (error) unlink(newname) ? */
+
+	return (error);
+}
+
+static int
+fs_open(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *fid = req->lr_fid;
+	struct stat st;
+	enum l9p_omode p9;
+	int error, flags;
+
+	p9 = req->lr_req.topen.mode;
+	error = fs_oflags_dotu(p9, &flags);
+	if (error)
+		return (error);
+
+	error = fs_iopen(softc, fid, flags, p9, (gid_t)-1, &st);
+	if (error)
+		return (error);
+
+	generate_qid(&st, &req->lr_resp.ropen.qid);
+	req->lr_resp.ropen.iounit = req->lr_conn->lc_max_io_size;
+	return (0);
+}
+
+/*
+ * Helper for directory read.  We want to run an lstat on each
+ * file name within the directory.  This is a lot faster if we
+ * have lstatat (or fstatat with AT_SYMLINK_NOFOLLOW), but not
+ * all systems do, so hide the ifdef-ed code in an inline function.
+ */
+static inline int
+fs_lstatat(struct fs_fid *file, char *name, struct stat *st)
+{
+
+	return (fstatat(dirfd(file->ff_dir), name, st, AT_SYMLINK_NOFOLLOW));
+}
+
+static int
+fs_read(void *softc, struct l9p_request *req)
+{
+	struct l9p_stat l9stat;
+	struct fs_softc *sc;
+	struct fs_fid *file;
+	bool dotu = req->lr_conn->lc_version >= L9P_2000U;
+	ssize_t ret;
+
+	sc = softc;
+	file = req->lr_fid->lo_aux;
+	assert(file != NULL);
+
+	if (file->ff_dir != NULL) {
+		struct dirent *d;
+		struct stat st;
+		struct l9p_message msg;
+		long o;
+
+		pthread_mutex_lock(&file->ff_mtx);
+
+		/*
+		 * Must use telldir before readdir since seekdir
+		 * takes cookie values.  Unfortunately this wastes
+		 * a lot of time (and memory) building unneeded
+		 * cookies that can only be flushed by closing
+		 * the directory.
+		 *
+		 * NB: FreeBSD libc seekdir has SINGLEUSE defined,
+		 * so in fact, we can discard the cookies by
+		 * calling seekdir on them.  This clears up wasted
+		 * memory at the cost of even more wasted time...
+		 *
+		 * XXX: readdir/telldir/seekdir not thread safe
+		 */
+		l9p_init_msg(&msg, req, L9P_PACK);
+		for (;;) {
+			o = telldir(file->ff_dir);
+			d = readdir(file->ff_dir);
+			if (d == NULL)
+				break;
+			if (fs_lstatat(file, d->d_name, &st))
+				continue;
+			dostat(sc, &l9stat, d->d_name, &st, dotu);
+			if (l9p_pack_stat(&msg, req, &l9stat) != 0) {
+				seekdir(file->ff_dir, o);
+				break;
+			}
+#if defined(__FreeBSD__)
+			seekdir(file->ff_dir, o);
+			(void) readdir(file->ff_dir);
+#endif
+		}
+
+		pthread_mutex_unlock(&file->ff_mtx);
+	} else {
+		size_t niov = l9p_truncate_iov(req->lr_data_iov,
+                    req->lr_data_niov, req->lr_req.io.count);
+
+#if defined(__FreeBSD__)
+		ret = preadv(file->ff_fd, req->lr_data_iov, niov,
+		    req->lr_req.io.offset);
+#else
+		/* XXX: not thread safe, should really use aio_listio. */
+		if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
+			return (errno);
+
+		ret = (uint32_t)readv(file->ff_fd, req->lr_data_iov, (int)niov);
+#endif
+
+		if (ret < 0)
+			return (errno);
+
+		req->lr_resp.io.count = (uint32_t)ret;
+	}
+
+	return (0);
+}
+
+static int
+fs_remove(void *softc, struct l9p_fid *fid)
+{
+	struct fs_softc *sc = softc;
+	struct l9p_acl *parent_acl;
+	struct fs_fid *file;
+	struct stat pst, cst;
+	char dirname[MAXPATHLEN];
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	error = fs_pdir(sc, fid, dirname, sizeof(dirname), &pst);
+	if (error)
+		return (error);
+
+	file = fid->lo_aux;
+	if (fstatat(file->ff_dirfd, file->ff_name, &cst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (error);
+
+	parent_acl = getacl(file, -1, dirname);
+	fillacl(file);
+
+	error = check_access(L9P_ACOP_UNLINK,
+	    parent_acl, &pst, file->ff_acl, &cst, file->ff_ai, (gid_t)-1);
+	l9p_acl_free(parent_acl);
+	if (error)
+		return (error);
+
+	if (unlinkat(file->ff_dirfd, file->ff_name,
+	    S_ISDIR(cst.st_mode) ? AT_REMOVEDIR : 0) != 0)
+		error = errno;
+
+	return (error);
+}
+
+static int
+fs_stat(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc;
+	struct fs_fid *file;
+	struct stat st;
+	bool dotu = req->lr_conn->lc_version >= L9P_2000U;
+
+	sc = softc;
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	if (fstatat(file->ff_dirfd, file->ff_name, &st,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	dostat(sc, &req->lr_resp.rstat.stat, file->ff_name, &st, dotu);
+	return (0);
+}
+
+static int
+fs_walk(void *softc, struct l9p_request *req)
+{
+	struct l9p_acl *acl;
+	struct fs_authinfo *ai;
+	struct fs_fid *file = req->lr_fid->lo_aux;
+	struct fs_fid *newfile;
+	struct stat st;
+	size_t clen, namelen, need;
+	char *comp, *succ, *next, *swtmp;
+	bool atroot;
+	bool dotdot;
+	int i, nwname;
+	int error = 0;
+	char namebufs[2][MAXPATHLEN];
+
+	/*
+	 * https://swtch.com/plan9port/man/man9/walk.html:
+	 *
+	 *    It is legal for nwname to be zero, in which case newfid
+	 *    will represent the same file as fid and the walk will
+	 *    usually succeed; this is equivalent to walking to dot.
+	 * [Aside: it's not clear if we should test S_ISDIR here.]
+	 *    ...
+	 *    The name ".." ... represents the parent directory.
+	 *    The name "." ... is not used in the protocol.
+	 *    ... A walk of the name ".." in the root directory
+	 *    of the server is equivalent to a walk with no name
+	 *    elements.
+	 *
+	 * Note that req.twalk.nwname never exceeds L9P_MAX_WELEM,
+	 * so it is safe to convert to plain int.
+	 *
+	 * We are to return an error only if the first walk fails,
+	 * else stop at the end of the names or on the first error.
+	 * The final fid is based on the last name successfully
+	 * walked.
+	 *
+	 * Note that we *do* get Twalk requests with nwname==0 on files.
+	 *
+	 * Set up "successful name" buffer pointer with base fid name,
+	 * initially.  We'll swap each new success into it as we go.
+	 *
+	 * Invariant: atroot and stat data correspond to current
+	 * (succ) path.
+	 */
+	succ = namebufs[0];
+	next = namebufs[1];
+	namelen = strlcpy(succ, file->ff_name, MAXPATHLEN);
+	if (namelen >= MAXPATHLEN)
+		return (ENAMETOOLONG);
+	if (fstatat(file->ff_dirfd, succ, &st, AT_SYMLINK_NOFOLLOW) < 0)
+		return (errno);
+	ai = file->ff_ai;
+	atroot = strlen(succ) == 0; /* XXX? */
+	fillacl(file);
+	acl = file->ff_acl;
+
+	nwname = (int)req->lr_req.twalk.nwname;
+
+	for (i = 0; i < nwname; i++) {
+		/*
+		 * Must have execute permission to search a directory.
+		 * Then, look up each component in its directory-so-far.
+		 * Check for ".." along the way, handlng specially
+		 * as needed.  Forbid "/" in name components.
+		 *
+		 */
+		if (!S_ISDIR(st.st_mode)) {
+			error = ENOTDIR;
+			goto out;
+		}
+		error = check_access(L9P_ACE_EXECUTE,
+		     NULL, NULL, acl, &st, ai, (gid_t)-1);
+		if (error) {
+			L9P_LOG(L9P_DEBUG,
+			    "Twalk: denying dir-walk on \"%s\" for uid %u",
+			    succ, (unsigned)ai->ai_uid);
+			error = EPERM;
+			goto out;
+		}
+		comp = req->lr_req.twalk.wname[i];
+		if (strchr(comp, '/') != NULL) {
+			error = EINVAL;
+			break;
+		}
+
+		clen = strlen(comp);
+		dotdot = false;
+
+		/*
+		 * Build next pathname (into "next").  If "..",
+		 * just strip one name component off the success
+		 * name so far.  Since we know this name fits, the
+		 * stripped down version also fits.  Otherwise,
+		 * the name is the base name plus '/' plus the
+		 * component name plus terminating '\0'; this may
+		 * or may not fit.
+		 */
+		if (comp[0] == '.') {
+			if (clen == 1) {
+				error = EINVAL;
+				break;
+			}
+			if (comp[1] == '.' && clen == 2)
+				dotdot = true;
+		}
+		if (dotdot) {
+			/*
+			 * It's not clear how ".." at root should
+			 * be handled when i > 0.  Obeying the man
+			 * page exactly, we reset i to 0 and stop,
+			 * declaring terminal success.
+			 *
+			 * Otherwise, we just climbed up one level
+			 * so adjust "atroot".
+			 */
+			if (atroot) {
+				i = 0;
+				break;
+			}
+			(void) r_dirname(succ, next, MAXPATHLEN);
+			namelen = strlen(next);
+			atroot = strlen(next) == 0; /* XXX? */
+		} else {
+			need = namelen + 1 + clen + 1;
+			if (need > MAXPATHLEN) {
+				error = ENAMETOOLONG;
+				break;
+			}
+			memcpy(next, succ, namelen);
+			next[namelen++] = '/';
+			memcpy(&next[namelen], comp, clen + 1);
+			namelen += clen;
+			/*
+			 * Since name is never ".", we are necessarily
+			 * descending below the root now.
+			 */
+			atroot = false;
+		}
+
+		if (fstatat(file->ff_dirfd, next, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+			error = ENOENT;
+			break;
+		}
+
+		/*
+		 * Success: generate qid and swap this
+		 * successful name into place.  Update acl.
+		 */
+		generate_qid(&st, &req->lr_resp.rwalk.wqid[i]);
+		swtmp = succ;
+		succ = next;
+		next = swtmp;
+		if (acl != NULL && acl != file->ff_acl)
+			l9p_acl_free(acl);
+		acl = getacl(file, -1, next);
+	}
+
+	/*
+	 * Fail only if we failed on the first name.
+	 * Otherwise we succeeded on something, and "succ"
+	 * points to the last successful name in namebufs[].
+	 */
+	if (error) {
+		if (i == 0)
+			goto out;
+		error = 0;
+	}
+
+	newfile = open_fid(file->ff_dirfd, succ, ai, false);
+	if (newfile == NULL) {
+		error = ENOMEM;
+		goto out;
+	}
+	if (req->lr_newfid == req->lr_fid) {
+		/*
+		 * Before overwriting fid->lo_aux, free the old value.
+		 * Note that this doesn't free the l9p_fid data,
+		 * just the fs_fid data.  (But it does ditch ff_acl.)
+		 */
+		if (acl == file->ff_acl)
+			acl = NULL;
+		fs_freefid(softc, req->lr_fid);
+		file = NULL;
+	}
+	req->lr_newfid->lo_aux = newfile;
+	if (file != NULL && acl != file->ff_acl) {
+		newfile->ff_acl = acl;
+		acl = NULL;
+	}
+	req->lr_resp.rwalk.nwqid = (uint16_t)i;
+out:
+	if (file != NULL && acl != file->ff_acl)
+		l9p_acl_free(acl);
+	return (error);
+}
+
+static int
+fs_write(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct fs_fid *file;
+	ssize_t ret;
+
+	file = req->lr_fid->lo_aux;
+	assert(file != NULL);
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	size_t niov = l9p_truncate_iov(req->lr_data_iov,
+            req->lr_data_niov, req->lr_req.io.count);
+
+#if defined(__FreeBSD__)
+	ret = pwritev(file->ff_fd, req->lr_data_iov, niov,
+	    req->lr_req.io.offset);
+#else
+	/* XXX: not thread safe, should really use aio_listio. */
+	if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0)
+		return (errno);
+
+	ret = writev(file->ff_fd, req->lr_data_iov,
+	    (int)niov);
+#endif
+
+	if (ret < 0)
+		return (errno);
+
+	req->lr_resp.io.count = (uint32_t)ret;
+	return (0);
+}
+
+static int
+fs_wstat(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct l9p_stat *l9stat = &req->lr_req.twstat.stat;
+	struct l9p_fid *fid;
+	struct fs_fid *file;
+	int error = 0;
+
+	fid = req->lr_fid;
+	file = fid->lo_aux;
+	assert(file != NULL);
+
+	/*
+	 * XXX:
+	 *
+	 * stat(9P) sez:
+	 *
+	 * Either all the changes in wstat request happen, or none of them
+	 * does: if the request succeeds, all changes were made; if it fails,
+	 * none were.
+	 *
+	 * Atomicity is clearly missing in current implementation.
+	 */
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	if (l9stat->atime != (uint32_t)~0) {
+		/* XXX: not implemented, ignore */
+	}
+
+	if (l9stat->mtime != (uint32_t)~0) {
+		/* XXX: not implemented, ignore */
+	}
+
+	if (l9stat->dev != (uint32_t)~0) {
+		error = EPERM;
+		goto out;
+	}
+
+	if (l9stat->length != (uint64_t)~0) {
+		if (file->ff_dir != NULL) {
+			error = EINVAL;
+			goto out;
+		}
+
+		if (truncate(file->ff_name, (off_t)l9stat->length) != 0) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (req->lr_conn->lc_version >= L9P_2000U) {
+		if (fchownat(file->ff_dirfd, file->ff_name, l9stat->n_uid,
+		    l9stat->n_gid, AT_SYMLINK_NOFOLLOW) != 0) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (l9stat->mode != (uint32_t)~0) {
+		if (fchmodat(file->ff_dirfd, file->ff_name,
+		    l9stat->mode & 0777, 0) != 0) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (strlen(l9stat->name) > 0) {
+		struct l9p_acl *parent_acl;
+		struct stat st;
+		char *tmp;
+		char newname[MAXPATHLEN];
+
+		/*
+		 * Rename-within-directory: it's not deleting anything,
+		 * but we need write permission on the directory.  This
+		 * should suffice.
+		 */
+		error = fs_pdir(softc, fid, newname, sizeof(newname), &st);
+		if (error)
+			goto out;
+		parent_acl = getacl(file, -1, newname);
+		error = check_access(L9P_ACE_ADD_FILE,
+		    parent_acl, &st, NULL, NULL, file->ff_ai, (gid_t)-1);
+		l9p_acl_free(parent_acl);
+		if (error)
+			goto out;
+		error = fs_dpf(newname, l9stat->name, sizeof(newname));
+		if (error)
+			goto out;
+		tmp = strdup(newname);
+		if (tmp == NULL) {
+			error = ENOMEM;
+			goto out;
+		}
+		if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
+		    tmp) != 0) {
+			error = errno;
+			free(tmp);
+			goto out;
+		}
+		/* Successful rename, update file->ff_name.  ACL can stay. */
+		free(file->ff_name);
+		file->ff_name = tmp;
+	}
+out:
+	return (error);
+}
+
+static int
+fs_statfs(void *softc __unused, struct l9p_request *req)
+{
+	struct fs_fid *file;
+	struct stat st;
+	struct statfs f;
+	long name_max;
+	int error;
+	int fd;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	if (fstatat(file->ff_dirfd, file->ff_name, &st,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	/*
+	 * Not entirely clear what access to require; we'll go
+	 * for "read data".
+	 */
+	fillacl(file);
+	error = check_access(L9P_ACE_READ_DATA, NULL, NULL,
+	    file->ff_acl, &st, file->ff_ai, (gid_t)-1);
+	if (error)
+		return (error);
+
+	fd = openat(file->ff_dirfd, file->ff_name, 0);
+	if (fd < 0)
+		return (errno);
+
+	if (fstatfs(fd, &f) != 0)
+		return (errno);
+
+	name_max = fpathconf(fd, _PC_NAME_MAX);
+	error = errno;
+	close(fd);
+
+	if (name_max == -1)
+		return (error);
+
+	dostatfs(&req->lr_resp.rstatfs.statfs, &f, name_max);
+
+	return (0);
+}
+
+static int
+fs_lopen(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *fid = req->lr_fid;
+	struct stat st;
+	enum l9p_omode p9;
+	gid_t gid;
+	int error, flags;
+
+	error = fs_oflags_dotl(req->lr_req.tlopen.flags, &flags, &p9);
+	if (error)
+		return (error);
+
+	gid = req->lr_req.tlopen.gid;
+	error = fs_iopen(softc, fid, flags, p9, gid, &st);
+	if (error)
+		return (error);
+
+	generate_qid(&st, &req->lr_resp.rlopen.qid);
+	req->lr_resp.rlopen.iounit = req->lr_conn->lc_max_io_size;
+	return (0);
+}
+
+static int
+fs_lcreate(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	enum l9p_omode p9;
+	char *name;
+	mode_t perm;
+	gid_t gid;
+	int error, flags;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tlcreate.name;
+
+	error = fs_oflags_dotl(req->lr_req.tlcreate.flags, &flags, &p9);
+	if (error)
+		return (error);
+
+	perm = (mode_t)req->lr_req.tlcreate.mode & 0777; /* ? set-id bits? */
+	gid = req->lr_req.tlcreate.gid;
+	error = fs_icreate(softc, dir, name, flags, false, perm, gid, &st);
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rlcreate.qid);
+	req->lr_resp.rlcreate.iounit = req->lr_conn->lc_max_io_size;
+	return (error);
+}
+
+static int
+fs_symlink(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	gid_t gid;
+	char *name, *symtgt;
+	int error;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tsymlink.name;
+	symtgt = req->lr_req.tsymlink.symtgt;
+	gid = req->lr_req.tsymlink.gid;
+	error = fs_isymlink(softc, dir, name, symtgt, gid, &st);
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rsymlink.qid);
+	return (error);
+}
+
+static int
+fs_mknod(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	uint32_t mode, major, minor;
+	dev_t dev;
+	gid_t gid;
+	char *name;
+	int error;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tmknod.name;
+	mode = req->lr_req.tmknod.mode;
+	gid = req->lr_req.tmknod.gid;
+
+	switch (mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		mode = (mode & S_IFMT) | (mode & 0777);	/* ??? */
+		major = req->lr_req.tmknod.major;
+		minor = req->lr_req.tmknod.major;
+		dev = makedev(major, minor);
+		error = fs_imknod(softc, dir, name, false,
+		    (mode_t)mode, dev, gid, &st);
+		break;
+
+	case S_IFIFO:
+		error = fs_imkfifo(softc, dir, name, false,
+		    (mode_t)(mode & 0777), gid, &st);
+		break;
+
+	case S_IFSOCK:
+		error = fs_imksocket(softc, dir, name, false,
+		    (mode_t)(mode & 0777), gid, &st);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rmknod.qid);
+	return (error);
+}
+
+static int
+fs_rename(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct fs_authinfo *ai;
+	struct l9p_acl *oparent_acl;
+	struct l9p_fid *fid, *f2;
+	struct fs_fid *file, *f2ff;
+	struct stat cst, opst, npst;
+	int32_t op;
+	bool reparenting;
+	char *tmp;
+	char olddir[MAXPATHLEN], newname[MAXPATHLEN];
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	/*
+	 * Note: lr_fid represents the file that is to be renamed,
+	 * so we must locate its parent directory and verify that
+	 * both this parent directory and the new directory f2 are
+	 * writable.  But if the new parent directory is the same
+	 * path as the old parent directory, our job is simpler.
+	 */
+	fid = req->lr_fid;
+	file = fid->lo_aux;
+	assert(file != NULL);
+	ai = file->ff_ai;
+
+	error = fs_pdir(sc, fid, olddir, sizeof(olddir), &opst);
+	if (error)
+		return (error);
+
+	f2 = req->lr_fid2;
+	f2ff = f2->lo_aux;
+	assert(f2ff != NULL);
+
+	reparenting = strcmp(olddir, f2ff->ff_name) != 0;
+
+	fillacl(file);
+	fillacl(f2ff);
+
+	if (fstatat(file->ff_dirfd, file->ff_name, &cst,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	/*
+	 * Are we moving from olddir?  If so, we're unlinking
+	 * from it, in terms of ACL access.
+	 */
+	if (reparenting) {
+		oparent_acl = getacl(file, -1, olddir);
+		error = check_access(L9P_ACOP_UNLINK,
+		    oparent_acl, &opst, file->ff_acl, &cst, ai, (gid_t)-1);
+		l9p_acl_free(oparent_acl);
+		if (error)
+			return (error);
+	}
+
+	/*
+	 * Now check that we're allowed to "create" a file or directory in
+	 * f2.  (Should we do this, too, only if reparenting?  Maybe check
+	 * for dir write permission if not reparenting -- but that's just
+	 * add-file/add-subdir, which means doing this always.)
+	 */
+	if (fstatat(f2ff->ff_dirfd, f2ff->ff_name, &npst,
+	    AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	op = S_ISDIR(cst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
+	error = check_access(op, f2ff->ff_acl, &npst, NULL, NULL,
+	    ai, (gid_t)-1);
+	if (error)
+		return (error);
+
+	/*
+	 * Directories OK, file systems not R/O, etc; build final name.
+	 * f2ff->ff_name cannot exceed MAXPATHLEN, but out of general
+	 * paranoia, let's double check anyway.
+	 */
+	if (strlcpy(newname, f2ff->ff_name, sizeof(newname)) >= sizeof(newname))
+		return (ENAMETOOLONG);
+	error = fs_dpf(newname, req->lr_req.trename.name, sizeof(newname));
+	if (error)
+		return (error);
+	tmp = strdup(newname);
+	if (tmp == NULL)
+		return (ENOMEM);
+
+	if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd, tmp) != 0) {
+		error = errno;
+		free(tmp);
+		return (error);
+	}
+
+	/* file has been renamed but old fid is not clunked */
+	free(file->ff_name);
+	file->ff_name = tmp;
+
+	dropacl(file);
+	return (0);
+}
+
+static int
+fs_readlink(void *softc __unused, struct l9p_request *req)
+{
+	struct fs_fid *file;
+	ssize_t linklen;
+	char buf[MAXPATHLEN];
+	int error = 0;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	linklen = readlinkat(file->ff_dirfd, file->ff_name, buf, sizeof(buf));
+	if (linklen < 0)
+		error = errno;
+	else if ((size_t)linklen >= sizeof(buf))
+		error = ENOMEM; /* todo: allocate dynamically */
+	else if ((req->lr_resp.rreadlink.target = strndup(buf,
+	    (size_t)linklen)) == NULL)
+		error = ENOMEM;
+	return (error);
+}
+
+static int
+fs_getattr(void *softc __unused, struct l9p_request *req)
+{
+	uint64_t mask, valid;
+	struct fs_fid *file;
+	struct stat st;
+	int error = 0;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	valid = 0;
+	if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
+		error = errno;
+		goto out;
+	}
+	/* ?? Can we provide items not-requested? If so, can skip tests. */
+	mask = req->lr_req.tgetattr.request_mask;
+	if (mask & L9PL_GETATTR_MODE) {
+		/* It is not clear if we need any translations. */
+		req->lr_resp.rgetattr.mode = st.st_mode;
+		valid |= L9PL_GETATTR_MODE;
+	}
+	if (mask & L9PL_GETATTR_NLINK) {
+		req->lr_resp.rgetattr.nlink = st.st_nlink;
+		valid |= L9PL_GETATTR_NLINK;
+	}
+	if (mask & L9PL_GETATTR_UID) {
+		/* provide st_uid, or file->ff_uid? */
+		req->lr_resp.rgetattr.uid = st.st_uid;
+		valid |= L9PL_GETATTR_UID;
+	}
+	if (mask & L9PL_GETATTR_GID) {
+		/* provide st_gid, or file->ff_gid? */
+		req->lr_resp.rgetattr.gid = st.st_gid;
+		valid |= L9PL_GETATTR_GID;
+	}
+	if (mask & L9PL_GETATTR_RDEV) {
+		/* It is not clear if we need any translations. */
+		req->lr_resp.rgetattr.rdev = (uint64_t)st.st_rdev;
+		valid |= L9PL_GETATTR_RDEV;
+	}
+	if (mask & L9PL_GETATTR_ATIME) {
+		req->lr_resp.rgetattr.atime_sec =
+		    (uint64_t)st.st_atimespec.tv_sec;
+		req->lr_resp.rgetattr.atime_nsec =
+		    (uint64_t)st.st_atimespec.tv_nsec;
+		valid |= L9PL_GETATTR_ATIME;
+	}
+	if (mask & L9PL_GETATTR_MTIME) {
+		req->lr_resp.rgetattr.mtime_sec =
+		    (uint64_t)st.st_mtimespec.tv_sec;
+		req->lr_resp.rgetattr.mtime_nsec =
+		    (uint64_t)st.st_mtimespec.tv_nsec;
+		valid |= L9PL_GETATTR_MTIME;
+	}
+	if (mask & L9PL_GETATTR_CTIME) {
+		req->lr_resp.rgetattr.ctime_sec =
+		    (uint64_t)st.st_ctimespec.tv_sec;
+		req->lr_resp.rgetattr.ctime_nsec =
+		    (uint64_t)st.st_ctimespec.tv_nsec;
+		valid |= L9PL_GETATTR_CTIME;
+	}
+	if (mask & L9PL_GETATTR_BTIME) {
+#if defined(HAVE_BIRTHTIME)
+		req->lr_resp.rgetattr.btime_sec =
+		    (uint64_t)st.st_birthtim.tv_sec;
+		req->lr_resp.rgetattr.btime_nsec =
+		    (uint64_t)st.st_birthtim.tv_nsec;
+#else
+		req->lr_resp.rgetattr.btime_sec = 0;
+		req->lr_resp.rgetattr.btime_nsec = 0;
+#endif
+		valid |= L9PL_GETATTR_BTIME;
+	}
+	if (mask & L9PL_GETATTR_INO)
+		valid |= L9PL_GETATTR_INO;
+	if (mask & L9PL_GETATTR_SIZE) {
+		req->lr_resp.rgetattr.size = (uint64_t)st.st_size;
+		valid |= L9PL_GETATTR_SIZE;
+	}
+	if (mask & L9PL_GETATTR_BLOCKS) {
+		req->lr_resp.rgetattr.blksize = (uint64_t)st.st_blksize;
+		req->lr_resp.rgetattr.blocks = (uint64_t)st.st_blocks;
+		valid |= L9PL_GETATTR_BLOCKS;
+	}
+	if (mask & L9PL_GETATTR_GEN) {
+		req->lr_resp.rgetattr.gen = st.st_gen;
+		valid |= L9PL_GETATTR_GEN;
+	}
+	/* don't know what to do with data version yet */
+
+	generate_qid(&st, &req->lr_resp.rgetattr.qid);
+out:
+	req->lr_resp.rgetattr.valid = valid;
+	return (error);
+}
+
+/*
+ * Should combine some of this with wstat code.
+ */
+static int
+fs_setattr(void *softc, struct l9p_request *req)
+{
+	uint64_t mask;
+	struct fs_softc *sc = softc;
+	struct timespec ts[2];
+	struct fs_fid *file;
+	struct stat st;
+	int error = 0;
+	uid_t uid, gid;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	/*
+	 * As with WSTAT we have atomicity issues.
+	 */
+	mask = req->lr_req.tsetattr.valid;
+
+	if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) {
+		error = errno;
+		goto out;
+	}
+
+	if ((mask & L9PL_SETATTR_SIZE) && S_ISDIR(st.st_mode)) {
+		error = EISDIR;
+		goto out;
+	}
+
+	if (mask & L9PL_SETATTR_MODE) {
+		if (fchmodat(file->ff_dirfd, file->ff_name,
+		    req->lr_req.tsetattr.mode & 0777,
+		    AT_SYMLINK_NOFOLLOW)) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (mask & (L9PL_SETATTR_UID | L9PL_SETATTR_GID)) {
+		uid = mask & L9PL_SETATTR_UID
+		    ? req->lr_req.tsetattr.uid
+		    : (uid_t)-1;
+
+		gid = mask & L9PL_SETATTR_GID
+		    ? req->lr_req.tsetattr.gid
+		    : (gid_t)-1;
+
+		if (fchownat(file->ff_dirfd, file->ff_name, uid, gid,
+		    AT_SYMLINK_NOFOLLOW)) {
+			error = errno;
+			goto out;
+		}
+	}
+
+	if (mask & L9PL_SETATTR_SIZE) {
+		/* Truncate follows symlinks, is this OK? */
+		int fd = openat(file->ff_dirfd, file->ff_name, O_RDWR);
+		if (ftruncate(fd, (off_t)req->lr_req.tsetattr.size)) {
+			error = errno;
+			(void) close(fd);
+			goto out;
+		}
+		(void) close(fd);
+	}
+
+	if (mask & (L9PL_SETATTR_ATIME | L9PL_SETATTR_MTIME)) {
+		ts[0].tv_sec = st.st_atimespec.tv_sec;
+		ts[0].tv_nsec = st.st_atimespec.tv_nsec;
+		ts[1].tv_sec = st.st_mtimespec.tv_sec;
+		ts[1].tv_nsec = st.st_mtimespec.tv_nsec;
+
+		if (mask & L9PL_SETATTR_ATIME) {
+			if (mask & L9PL_SETATTR_ATIME_SET) {
+				ts[0].tv_sec = req->lr_req.tsetattr.atime_sec;
+				ts[0].tv_nsec = req->lr_req.tsetattr.atime_nsec;
+			} else {
+				if (clock_gettime(CLOCK_REALTIME, &ts[0]) != 0) {
+					error = errno;
+					goto out;
+				}
+			}
+		}
+
+		if (mask & L9PL_SETATTR_MTIME) {
+			if (mask & L9PL_SETATTR_MTIME_SET) {
+				ts[1].tv_sec = req->lr_req.tsetattr.mtime_sec;
+				ts[1].tv_nsec = req->lr_req.tsetattr.mtime_nsec;
+			} else {
+				if (clock_gettime(CLOCK_REALTIME, &ts[1]) != 0) {
+					error = errno;
+					goto out;
+				}
+			}
+		}
+
+		if (utimensat(file->ff_dirfd, file->ff_name, ts,
+		    AT_SYMLINK_NOFOLLOW)) {
+			error = errno;
+			goto out;
+		}
+	}
+out:
+	return (error);
+}
+
+static int
+fs_xattrwalk(void *softc __unused, struct l9p_request *req __unused)
+{
+	return (EOPNOTSUPP);
+}
+
+static int
+fs_xattrcreate(void *softc __unused, struct l9p_request *req __unused)
+{
+	return (EOPNOTSUPP);
+}
+
+static int
+fs_readdir(void *softc __unused, struct l9p_request *req)
+{
+	struct l9p_message msg;
+	struct l9p_dirent de;
+	struct fs_fid *file;
+	struct dirent *dp;
+	struct stat st;
+	uint32_t count;
+	int error = 0;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+
+	if (file->ff_dir == NULL)
+		return (ENOTDIR);
+
+	pthread_mutex_lock(&file->ff_mtx);
+
+	/*
+	 * It's not clear whether we can use the same trick for
+	 * discarding offsets here as we do in fs_read.  It
+	 * probably should work, we'll have to see if some
+	 * client(s) use the zero-offset thing to rescan without
+	 * clunking the directory first.
+	 *
+	 * Probably the thing to do is switch to calling
+	 * getdirentries() / getdents() directly, instead of
+	 * going through libc.
+	 */
+	if (req->lr_req.io.offset == 0)
+		rewinddir(file->ff_dir);
+	else
+		seekdir(file->ff_dir, (long)req->lr_req.io.offset);
+
+	l9p_init_msg(&msg, req, L9P_PACK);
+	count = (uint32_t)msg.lm_size; /* in case we get no entries */
+	while ((dp = readdir(file->ff_dir)) != NULL) {
+		/*
+		 * Although "." is forbidden in naming and ".." is
+		 * special cased, testing shows that we must transmit
+		 * them through readdir.  (For ".." at root, we
+		 * should perhaps alter the inode number, but not
+		 * yet.)
+		 */
+
+		/*
+		 * TODO: we do a full lstat here; could use dp->d_*
+		 * to construct the qid more efficiently, as long
+		 * as dp->d_type != DT_UNKNOWN.
+		 */
+		if (fs_lstatat(file, dp->d_name, &st))
+			continue;
+
+		de.qid.type = 0;
+		generate_qid(&st, &de.qid);
+		de.offset = (uint64_t)telldir(file->ff_dir);
+		de.type = dp->d_type;
+		de.name = dp->d_name;
+
+		/* Update count only if we completely pack the dirent. */
+		if (l9p_pudirent(&msg, &de) < 0)
+			break;
+		count = (uint32_t)msg.lm_size;
+	}
+
+	pthread_mutex_unlock(&file->ff_mtx);
+	req->lr_resp.io.count = count;
+	return (error);
+}
+
+static int
+fs_fsync(void *softc __unused, struct l9p_request *req)
+{
+	struct fs_fid *file;
+	int error = 0;
+
+	file = req->lr_fid->lo_aux;
+	assert(file);
+	if (fsync(file->ff_dir != NULL ? dirfd(file->ff_dir) : file->ff_fd))
+		error = errno;
+	return (error);
+}
+
+static int
+fs_lock(void *softc __unused, struct l9p_request *req)
+{
+
+	switch (req->lr_req.tlock.type) {
+	case L9PL_LOCK_TYPE_RDLOCK:
+	case L9PL_LOCK_TYPE_WRLOCK:
+	case L9PL_LOCK_TYPE_UNLOCK:
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	req->lr_resp.rlock.status = L9PL_LOCK_SUCCESS;
+	return (0);
+}
+
+static int
+fs_getlock(void *softc __unused, struct l9p_request *req)
+{
+
+	/*
+	 * Client wants to see if a request to lock a region would
+	 * block.  This is, of course, not atomic anyway, so the
+	 * op is useless.  QEMU simply says "unlocked!", so we do
+	 * too.
+	 */
+	switch (req->lr_req.getlock.type) {
+	case L9PL_LOCK_TYPE_RDLOCK:
+	case L9PL_LOCK_TYPE_WRLOCK:
+	case L9PL_LOCK_TYPE_UNLOCK:
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	req->lr_resp.getlock = req->lr_req.getlock;
+	req->lr_resp.getlock.type = L9PL_LOCK_TYPE_UNLOCK;
+	req->lr_resp.getlock.client_id = strdup("");  /* XXX what should go here? */
+	return (0);
+}
+
+static int
+fs_link(void *softc __unused, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct fs_fid *file;
+	struct fs_fid *dirf;
+	struct stat fst, tdst;
+	int32_t op;
+	char *name;
+	char newname[MAXPATHLEN];
+	int error;
+
+	/* N.B.: lr_fid is the file to link, lr_fid2 is the target dir */
+	dir = req->lr_fid2;
+	dirf = dir->lo_aux;
+	assert(dirf != NULL);
+
+	name = req->lr_req.tlink.name;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+
+	file = req->lr_fid->lo_aux;
+	assert(file != NULL);
+
+	if (fstatat(dirf->ff_dirfd, dirf->ff_name, &tdst, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(file->ff_dirfd, file->ff_name, &fst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (S_ISDIR(fst.st_mode))
+		return (EISDIR);
+	fillacl(dirf);
+	op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE;
+	error = check_access(op,
+	    dirf->ff_acl, &tdst, NULL, NULL, file->ff_ai, (gid_t)-1);
+	if (error)
+		return (error);
+
+	if (linkat(file->ff_dirfd, file->ff_name, file->ff_dirfd,
+	    newname, 0) != 0)
+		error = errno;
+	else
+		dropacl(file);
+
+	return (error);
+}
+
+static int
+fs_mkdir(void *softc, struct l9p_request *req)
+{
+	struct l9p_fid *dir;
+	struct stat st;
+	mode_t perm;
+	gid_t gid;
+	char *name;
+	int error;
+
+	dir = req->lr_fid;
+	name = req->lr_req.tmkdir.name;
+	perm = (mode_t)req->lr_req.tmkdir.mode;
+	gid = req->lr_req.tmkdir.gid;
+
+	error = fs_imkdir(softc, dir, name, false, perm, gid, &st);
+	if (error == 0)
+		generate_qid(&st, &req->lr_resp.rmkdir.qid);
+	return (error);
+}
+
+static int
+fs_renameat(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct l9p_fid *olddir, *newdir;
+	struct l9p_acl *facl;
+	struct fs_fid *off, *nff;
+	struct stat odst, ndst, fst;
+	int32_t op;
+	bool reparenting;
+	char *onp, *nnp;
+	char onb[MAXPATHLEN], nnb[MAXPATHLEN];
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	olddir = req->lr_fid;
+	newdir = req->lr_fid2;
+	assert(olddir != NULL && newdir != NULL);
+	off = olddir->lo_aux;
+	nff = newdir->lo_aux;
+	assert(off != NULL && nff != NULL);
+
+	onp = req->lr_req.trenameat.oldname;
+	nnp = req->lr_req.trenameat.newname;
+	error = fs_buildname(olddir, onp, onb, sizeof(onb));
+	if (error)
+		return (error);
+	error = fs_buildname(newdir, nnp, nnb, sizeof(nnb));
+	if (error)
+		return (error);
+	if (fstatat(off->ff_dirfd, onb, &fst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+
+	reparenting = olddir != newdir &&
+	    strcmp(off->ff_name, nff->ff_name) != 0;
+
+	if (fstatat(off->ff_dirfd, off->ff_name, &odst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	if (!S_ISDIR(odst.st_mode))
+		return (ENOTDIR);
+	fillacl(off);
+
+	if (reparenting) {
+		if (fstatat(nff->ff_dirfd, nff->ff_name, &ndst, AT_SYMLINK_NOFOLLOW) != 0)
+			return (errno);
+		if (!S_ISDIR(ndst.st_mode))
+			return (ENOTDIR);
+		facl = getacl(off, -1, onb);
+		fillacl(nff);
+
+		error = check_access(L9P_ACOP_UNLINK,
+		    off->ff_acl, &odst, facl, &fst, off->ff_ai, (gid_t)-1);
+		l9p_acl_free(facl);
+		if (error)
+			return (error);
+		op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY :
+		    L9P_ACE_ADD_FILE;
+		error = check_access(op,
+		    nff->ff_acl, &ndst, NULL, NULL, nff->ff_ai, (gid_t)-1);
+		if (error)
+			return (error);
+	}
+
+	if (renameat(off->ff_dirfd, onb, nff->ff_dirfd, nnb))
+		error = errno;
+
+	return (error);
+}
+
+/*
+ * Unlink file in given directory, or remove directory in given
+ * directory, based on flags.
+ */
+static int
+fs_unlinkat(void *softc, struct l9p_request *req)
+{
+	struct fs_softc *sc = softc;
+	struct l9p_acl *facl;
+	struct l9p_fid *dir;
+	struct fs_fid *dirff;
+	struct stat dirst, fst;
+	char *name;
+	char newname[MAXPATHLEN];
+	int error;
+
+	if (sc->fs_readonly)
+		return (EROFS);
+
+	dir = req->lr_fid;
+	dirff = dir->lo_aux;
+	assert(dirff != NULL);
+	name = req->lr_req.tunlinkat.name;
+	error = fs_buildname(dir, name, newname, sizeof(newname));
+	if (error)
+		return (error);
+	if (fstatat(dirff->ff_dirfd, newname, &fst, AT_SYMLINK_NOFOLLOW) != 0 ||
+	    fstatat(dirff->ff_dirfd, dirff->ff_name, &dirst, AT_SYMLINK_NOFOLLOW) != 0)
+		return (errno);
+	fillacl(dirff);
+	facl = getacl(dirff, -1, newname);
+	error = check_access(L9P_ACOP_UNLINK,
+	    dirff->ff_acl, &dirst, facl, &fst, dirff->ff_ai, (gid_t)-1);
+	l9p_acl_free(facl);
+	if (error)
+		return (error);
+
+	if (req->lr_req.tunlinkat.flags & L9PL_AT_REMOVEDIR) {
+		if (unlinkat(dirff->ff_dirfd, newname, AT_REMOVEDIR) != 0)
+			error = errno;
+	} else {
+		if (unlinkat(dirff->ff_dirfd, newname, 0) != 0)
+			error = errno;
+	}
+	return (error);
+}
+
+static void
+fs_freefid(void *softc __unused, struct l9p_fid *fid)
+{
+	struct fs_fid *f = fid->lo_aux;
+	struct fs_authinfo *ai;
+	uint32_t newcount;
+
+	if (f == NULL) {
+		/* Nothing to do here */
+		return;
+	}
+
+	if (f->ff_fd != -1)
+		close(f->ff_fd);
+
+	if (f->ff_dir)
+		closedir(f->ff_dir);
+
+	pthread_mutex_destroy(&f->ff_mtx);
+	free(f->ff_name);
+	ai = f->ff_ai;
+	l9p_acl_free(f->ff_acl);
+	free(f);
+	pthread_mutex_lock(&ai->ai_mtx);
+	newcount = --ai->ai_refcnt;
+	pthread_mutex_unlock(&ai->ai_mtx);
+	if (newcount == 0) {
+		/*
+		 * We *were* the last ref, no one can have gained a ref.
+		 */
+		L9P_LOG(L9P_DEBUG, "dropped last ref to authinfo %p",
+		    (void *)ai);
+		pthread_mutex_destroy(&ai->ai_mtx);
+		free(ai);
+	} else {
+		L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu",
+		    (void *)ai, (u_long)newcount);
+	}
+}
+
+int
+l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro)
+{
+	struct l9p_backend *backend;
+	struct fs_softc *sc;
+	int error;
+#if defined(WITH_CASPER)
+	cap_channel_t *capcas;
+#endif
+
+	if (!fs_attach_mutex_inited) {
+		error = pthread_mutex_init(&fs_attach_mutex, NULL);
+		if (error) {
+			errno = error;
+			return (-1);
+		}
+		fs_attach_mutex_inited = true;
+	}
+
+	backend = l9p_malloc(sizeof(*backend));
+	backend->attach = fs_attach;
+	backend->clunk = fs_clunk;
+	backend->create = fs_create;
+	backend->open = fs_open;
+	backend->read = fs_read;
+	backend->remove = fs_remove;
+	backend->stat = fs_stat;
+	backend->walk = fs_walk;
+	backend->write = fs_write;
+	backend->wstat = fs_wstat;
+	backend->statfs = fs_statfs;
+	backend->lopen = fs_lopen;
+	backend->lcreate = fs_lcreate;
+	backend->symlink = fs_symlink;
+	backend->mknod = fs_mknod;
+	backend->rename = fs_rename;
+	backend->readlink = fs_readlink;
+	backend->getattr = fs_getattr;
+	backend->setattr = fs_setattr;
+	backend->xattrwalk = fs_xattrwalk;
+	backend->xattrcreate = fs_xattrcreate;
+	backend->readdir = fs_readdir;
+	backend->fsync = fs_fsync;
+	backend->lock = fs_lock;
+	backend->getlock = fs_getlock;
+	backend->link = fs_link;
+	backend->mkdir = fs_mkdir;
+	backend->renameat = fs_renameat;
+	backend->unlinkat = fs_unlinkat;
+	backend->freefid = fs_freefid;
+
+	sc = l9p_malloc(sizeof(*sc));
+	sc->fs_rootfd = rootfd;
+	sc->fs_readonly = ro;
+	backend->softc = sc;
+
+#if defined(WITH_CASPER)
+	capcas = cap_init();
+	if (capcas == NULL)
+		return (-1);
+
+	sc->fs_cappwd = cap_service_open(capcas, "system.pwd");
+	if (sc->fs_cappwd == NULL)
+		return (-1);
+
+	sc->fs_capgrp = cap_service_open(capcas, "system.grp");
+	if (sc->fs_capgrp == NULL)
+		return (-1);
+
+	cap_setpassent(sc->fs_cappwd, 1);
+	cap_setgroupent(sc->fs_capgrp, 1);
+	cap_close(capcas);
+#else
+	setpassent(1);
+#endif
+
+	*backendp = backend;
+	return (0);
+}
Index: lib/lib9p/connection.c
===================================================================
--- /dev/null
+++ lib/lib9p/connection.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/queue.h>
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "fid.h"
+#include "hashtable.h"
+#include "log.h"
+#include "threadpool.h"
+#include "backend/backend.h"
+
+int
+l9p_server_init(struct l9p_server **serverp, struct l9p_backend *backend)
+{
+	struct l9p_server *server;
+
+	server = l9p_calloc(1, sizeof (*server));
+	server->ls_max_version = L9P_2000L;
+	server->ls_backend = backend;
+	LIST_INIT(&server->ls_conns);
+
+	*serverp = server;
+	return (0);
+}
+
+int
+l9p_connection_init(struct l9p_server *server, struct l9p_connection **conn)
+{
+	struct l9p_connection *newconn;
+
+	assert(server != NULL);
+	assert(conn != NULL);
+
+	newconn = calloc(1, sizeof (*newconn));
+	if (newconn == NULL)
+		return (-1);
+	newconn->lc_server = server;
+	newconn->lc_msize = L9P_DEFAULT_MSIZE;
+	if (l9p_threadpool_init(&newconn->lc_tp, L9P_NUMTHREADS)) {
+		free(newconn);
+		return (-1);
+	}
+	ht_init(&newconn->lc_files, 100);
+	ht_init(&newconn->lc_requests, 100);
+	LIST_INSERT_HEAD(&server->ls_conns, newconn, lc_link);
+	*conn = newconn;
+
+	return (0);
+}
+
+void
+l9p_connection_free(struct l9p_connection *conn)
+{
+
+	LIST_REMOVE(conn, lc_link);
+	free(conn);
+}
+
+void
+l9p_connection_recv(struct l9p_connection *conn, const struct iovec *iov,
+    const size_t niov, void *aux)
+{
+	struct l9p_request *req;
+	int error;
+
+	req = l9p_calloc(1, sizeof (struct l9p_request));
+	req->lr_aux = aux;
+	req->lr_conn = conn;
+
+	req->lr_req_msg.lm_mode = L9P_UNPACK;
+	req->lr_req_msg.lm_niov = niov;
+	memcpy(req->lr_req_msg.lm_iov, iov, sizeof (struct iovec) * niov);
+
+	req->lr_resp_msg.lm_mode = L9P_PACK;
+
+	if (l9p_pufcall(&req->lr_req_msg, &req->lr_req, conn->lc_version) != 0) {
+		L9P_LOG(L9P_WARNING, "cannot unpack received message");
+		l9p_freefcall(&req->lr_req);
+		free(req);
+		return;
+	}
+
+	if (ht_add(&conn->lc_requests, req->lr_req.hdr.tag, req)) {
+		L9P_LOG(L9P_WARNING, "client reusing outstanding tag %d",
+		    req->lr_req.hdr.tag);
+		l9p_freefcall(&req->lr_req);
+		free(req);
+		return;
+	}
+
+	error = conn->lc_lt.lt_get_response_buffer(req,
+	    req->lr_resp_msg.lm_iov,
+	    &req->lr_resp_msg.lm_niov,
+	    conn->lc_lt.lt_aux);
+	if (error) {
+		L9P_LOG(L9P_WARNING, "cannot obtain buffers for response");
+		ht_remove(&conn->lc_requests, req->lr_req.hdr.tag);
+		l9p_freefcall(&req->lr_req);
+		free(req);
+		return;
+	}
+
+	/*
+	 * NB: it's up to l9p_threadpool_run to decide whether
+	 * to queue the work or to run it immediately and wait
+	 * (it must do the latter for Tflush requests).
+	 */
+	l9p_threadpool_run(&conn->lc_tp, req);
+}
+
+void
+l9p_connection_close(struct l9p_connection *conn)
+{
+	struct ht_iter iter;
+	struct l9p_fid *fid;
+	struct l9p_request *req;
+
+	L9P_LOG(L9P_DEBUG, "waiting for thread pool to shut down");
+	l9p_threadpool_shutdown(&conn->lc_tp);
+
+	/* Drain pending requests (if any) */
+	L9P_LOG(L9P_DEBUG, "draining pending requests");
+	ht_iter(&conn->lc_requests, &iter);
+	while ((req = ht_next(&iter)) != NULL) {
+#ifdef notyet
+		/* XXX would be good to know if there is anyone listening */
+		if (anyone listening) {
+			/* XXX crude - ops like Tclunk should succeed */
+			req->lr_error = EINTR;
+			l9p_respond(req, false, false);
+		} else
+#endif
+		l9p_respond(req, true, false);	/* use no-answer path */
+		ht_remove_at_iter(&iter);
+	}
+
+	/* Close opened files (if any) */
+	L9P_LOG(L9P_DEBUG, "closing opened files");
+	ht_iter(&conn->lc_files, &iter);
+	while ((fid = ht_next(&iter)) != NULL) {
+		conn->lc_server->ls_backend->freefid(
+		    conn->lc_server->ls_backend->softc, fid);
+		free(fid);
+		ht_remove_at_iter(&iter);
+	}
+
+	ht_destroy(&conn->lc_requests);
+	ht_destroy(&conn->lc_files);
+}
+
+struct l9p_fid *
+l9p_connection_alloc_fid(struct l9p_connection *conn, uint32_t fid)
+{
+	struct l9p_fid *file;
+
+	file = l9p_calloc(1, sizeof (struct l9p_fid));
+	file->lo_fid = fid;
+	/*
+	 * Note that the new fid is not marked valid yet.
+	 * The insert here will fail if the fid number is
+	 * in use, otherwise we have an invalid fid in the
+	 * table (as desired).
+	 */
+
+	if (ht_add(&conn->lc_files, fid, file) != 0) {
+		free(file);
+		return (NULL);
+	}
+
+	return (file);
+}
+
+void
+l9p_connection_remove_fid(struct l9p_connection *conn, struct l9p_fid *fid)
+{
+	struct l9p_backend *be;
+
+	/* fid should be marked invalid by this point */
+	assert(!l9p_fid_isvalid(fid));
+
+	be = conn->lc_server->ls_backend;
+	be->freefid(be->softc, fid);
+
+	ht_remove(&conn->lc_files, fid->lo_fid);
+	free(fid);
+}
Index: lib/lib9p/example/Makefile
===================================================================
--- /dev/null
+++ lib/lib9p/example/Makefile
@@ -0,0 +1,10 @@
+PROG=	server
+SRCS=	server.c
+MAN=
+
+CFLAGS=	-pthread -g -O0
+
+LDFLAGS=-L..
+LDADD=	-lsbuf -l9p -lcasper -lcap_pwd -lcap_grp
+
+.include <bsd.prog.mk>
Index: lib/lib9p/example/server.c
===================================================================
--- /dev/null
+++ lib/lib9p/example/server.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <err.h>
+#include <unistd.h>
+#include "../lib9p.h"
+#include "../backend/fs.h"
+#include "../transport/socket.h"
+
+int
+main(int argc, char **argv)
+{
+	struct l9p_backend *fs_backend;
+	struct l9p_server *server;
+	char *host = "0.0.0.0";
+	char *port = "564";
+	char *path;
+	int rootfd;
+	int opt;
+
+	while ((opt = getopt(argc, argv, "h:p:")) != -1) {
+		switch (opt) {
+		case 'h':
+			host = optarg;
+			break;
+		case 'p':
+			port = optarg;
+			break;
+		case '?':
+		default:
+			goto usage;
+		}
+	}
+	if (optind >= argc) {
+usage:
+		errx(1, "Usage: server <path>");
+	}
+	path = argv[optind];
+	rootfd = open(path, O_DIRECTORY);
+
+	if (rootfd < 0)
+		err(1, "cannot open root directory");
+
+	if (l9p_backend_fs_init(&fs_backend, rootfd) != 0)
+		err(1, "cannot init backend");
+
+	if (l9p_server_init(&server, fs_backend) != 0)
+		err(1, "cannot create server");
+
+	server->ls_max_version = L9P_2000L;
+	if (l9p_start_server(server, host, port))
+		err(1, "l9p_start_server() failed");
+
+	/* XXX - we never get here, l9p_start_server does not return */
+	exit(0);
+}
Index: lib/lib9p/fcall.h
===================================================================
--- /dev/null
+++ lib/lib9p/fcall.h
@@ -0,0 +1,624 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Based on libixp code: ©2007-2010 Kris Maglione <maglione.k at Gmail>
+ */
+
+#ifndef LIB9P_FCALL_H
+#define LIB9P_FCALL_H
+
+#include <stdint.h>
+
+#define L9P_MAX_WELEM   256
+
+/*
+ * Function call/reply (Tfoo/Rfoo) numbers.
+ *
+ * These are protocol code numbers, so the exact values
+ * matter.  However, __FIRST and __LAST_PLUS_ONE are for
+ * debug code, and just need to encompass the entire range.
+ *
+ * Note that we rely (in the debug code) on Rfoo == Tfoo+1.
+ */
+enum l9p_ftype {
+	L9P__FIRST = 6,		/* NB: must be <= all legal values */
+	L9P_TLERROR = 6,	/* illegal; exists for parity with Rlerror */
+	L9P_RLERROR,
+	L9P_TSTATFS = 8,
+	L9P_RSTATFS,
+	L9P_TLOPEN = 12,
+	L9P_RLOPEN,
+	L9P_TLCREATE = 14,
+	L9P_RLCREATE,
+	L9P_TSYMLINK = 16,
+	L9P_RSYMLINK,
+	L9P_TMKNOD = 18,
+	L9P_RMKNOD,
+	L9P_TRENAME = 20,
+	L9P_RRENAME,
+	L9P_TREADLINK = 22,
+	L9P_RREADLINK,
+	L9P_TGETATTR = 24,
+	L9P_RGETATTR,
+	L9P_TSETATTR = 26,
+	L9P_RSETATTR,
+	L9P_TXATTRWALK = 30,
+	L9P_RXATTRWALK,
+	L9P_TXATTRCREATE = 32,
+	L9P_RXATTRCREATE,
+	L9P_TREADDIR = 40,
+	L9P_RREADDIR,
+	L9P_TFSYNC = 50,
+	L9P_RFSYNC,
+	L9P_TLOCK = 52,
+	L9P_RLOCK,
+	L9P_TGETLOCK = 54,
+	L9P_RGETLOCK,
+	L9P_TLINK = 70,
+	L9P_RLINK,
+	L9P_TMKDIR = 72,
+	L9P_RMKDIR,
+	L9P_TRENAMEAT = 74,
+	L9P_RRENAMEAT,
+	L9P_TUNLINKAT = 76,
+	L9P_RUNLINKAT,
+	L9P_TVERSION = 100,
+	L9P_RVERSION,
+	L9P_TAUTH = 102,
+	L9P_RAUTH,
+	L9P_TATTACH = 104,
+	L9P_RATTACH,
+	L9P_TERROR = 106, 	/* illegal */
+	L9P_RERROR,
+	L9P_TFLUSH = 108,
+	L9P_RFLUSH,
+	L9P_TWALK = 110,
+	L9P_RWALK,
+	L9P_TOPEN = 112,
+	L9P_ROPEN,
+	L9P_TCREATE = 114,
+	L9P_RCREATE,
+	L9P_TREAD = 116,
+	L9P_RREAD,
+	L9P_TWRITE = 118,
+	L9P_RWRITE,
+	L9P_TCLUNK = 120,
+	L9P_RCLUNK,
+	L9P_TREMOVE = 122,
+	L9P_RREMOVE,
+	L9P_TSTAT = 124,
+	L9P_RSTAT,
+	L9P_TWSTAT = 126,
+	L9P_RWSTAT,
+	L9P__LAST_PLUS_1,	/* NB: must be last */
+};
+
+/*
+ * When a Tfoo request comes over the wire, we decode it
+ * (pack.c) from wire format into a request laid out in
+ * a "union l9p_fcall" object.  This object is not in wire
+ * format, but rather in something more convenient for us
+ * to operate on.
+ *
+ * We then dispatch the request (request.c, backend/fs.c) and
+ * use another "union l9p_fcall" object to build a reply.
+ * The reply is converted to wire format on the way back out
+ * (pack.c again).
+ *
+ * All sub-objects start with a header containing the request
+ * or reply type code and two-byte tag, and whether or not it
+ * is needed, a four-byte fid.
+ *
+ * What this means here is that the data structures within
+ * the union can be shared across various requests and replies.
+ * For instance, replies to OPEN, CREATE, LCREATE, LOPEN, MKDIR, and
+ * SYMLINK are all fairly similar (providing a qid and sometimes
+ * an iounit) and hence can all use the l9p_f_ropen structure.
+ * Which structures are used for which operations is somewhat
+ * arbitrary; for programming ease, if an operation shares a
+ * data structure, it still has its own name: there are union
+ * members named ropen, rcreate, rlcreate, rlopen, rmkdir, and
+ * rsymlink, even though all use struct l9p_f_ropen.
+ *
+ * The big exception to the above rule is struct l9p_f_io, which
+ * is used as both request and reply for all of READ, WRITE, and
+ * READDIR.  Moreover, the READDIR reply must be pre-packed into
+ * wire format (it is handled like raw data a la READ).
+ *
+ * Some request messages (e.g., TREADLINK) fit in a header, having
+ * just type code, tag, and fid.  These have no separate data
+ * structure, nor union member name.  Similarly, some reply
+ * messages (e.g., RCLUNK, RREMOVE, RRENAME) have just the type
+ * code and tag.
+ */
+
+/*
+ * Type code bits in (the first byte of) a qid.
+ */
+enum l9p_qid_type {
+	L9P_QTDIR = 0x80, /* type bit for directories */
+	L9P_QTAPPEND = 0x40, /* type bit for append only files */
+	L9P_QTEXCL = 0x20, /* type bit for exclusive use files */
+	L9P_QTMOUNT = 0x10, /* type bit for mounted channel */
+	L9P_QTAUTH = 0x08, /* type bit for authentication file */
+	L9P_QTTMP = 0x04, /* type bit for non-backed-up file */
+	L9P_QTSYMLINK = 0x02, /* type bit for symbolic link */
+	L9P_QTFILE = 0x00 /* type bits for plain file */
+};
+
+/*
+ * Extra permission bits in create and file modes (stat).
+ */
+#define L9P_DMDIR 0x80000000
+enum {
+	L9P_DMAPPEND = 0x40000000,
+	L9P_DMEXCL = 0x20000000,
+	L9P_DMMOUNT = 0x10000000,
+	L9P_DMAUTH = 0x08000000,
+	L9P_DMTMP = 0x04000000,
+	L9P_DMSYMLINK = 0x02000000,
+	/* 9P2000.u extensions */
+	L9P_DMDEVICE = 0x00800000,
+	L9P_DMNAMEDPIPE = 0x00200000,
+	L9P_DMSOCKET = 0x00100000,
+	L9P_DMSETUID = 0x00080000,
+	L9P_DMSETGID = 0x00040000,
+};
+
+/*
+ * Open/create mode bits in 9P2000 and 9P2000.u operations
+ * (not Linux lopen and lcreate flags, which are different).
+ * Note that the mode field is only one byte wide.
+ */
+enum l9p_omode {
+	L9P_OREAD = 0,	/* open for read */
+	L9P_OWRITE = 1,	/* write */
+	L9P_ORDWR = 2,	/* read and write */
+	L9P_OEXEC = 3,	/* execute, == read but check execute permission */
+	L9P_OACCMODE = 3, /* mask for the above access-mode bits */
+	L9P_OTRUNC = 16,	/* or'ed in (except for exec), truncate file first */
+	L9P_OCEXEC = 32,	/* or'ed in, close on exec */
+	L9P_ORCLOSE = 64,	/* or'ed in, remove on close */
+	L9P_ODIRECT = 128,	/* or'ed in, direct access */
+};
+
+/*
+ * Flag bits in 9P2000.L operations (Tlopen, Tlcreate).  These are
+ * basically just the Linux L_* flags.  The bottom 3 bits are the
+ * same as for l9p_omode, although open-for-exec is not used:
+ * instead, the client does a Tgetattr and checks the mode for
+ * execute bits, then just opens for reading.
+ *
+ * Each L_O_xxx is just value O_xxx has on Linux in <fcntl.h>;
+ * not all are necessarily used.  From observation, we do get
+ * L_O_CREAT and L_O_EXCL when creating with exclusive, and always
+ * get L_O_LARGEFILE.  We do get L_O_APPEND when opening for
+ * append.  We also get both L_O_DIRECT and L_O_DIRECTORY set
+ * when opening directories.
+ *
+ * We probably never get L_O_NOCTTY which makes no sense, and
+ * some of the other options may need to be handled on the client.
+ */
+enum l9p_l_o_flags {
+	L9P_L_O_CREAT =		000000100U,
+	L9P_L_O_EXCL =		000000200U,
+	L9P_L_O_NOCTTY =	000000400U,
+	L9P_L_O_TRUNC =		000001000U,
+	L9P_L_O_APPEND =	000002000U,
+	L9P_L_O_NONBLOCK =	000004000U,
+	L9P_L_O_DSYNC =		000010000U,
+	L9P_L_O_FASYNC =	000020000U,
+	L9P_L_O_DIRECT =	000040000U,
+	L9P_L_O_LARGEFILE =	000100000U,
+	L9P_L_O_DIRECTORY =	000200000U,
+	L9P_L_O_NOFOLLOW =	000400000U,
+	L9P_L_O_NOATIME =	001000000U,
+	L9P_L_O_CLOEXEC =	002000000U,
+	L9P_L_O_SYNC =		004000000U,
+	L9P_L_O_PATH =		010000000U,
+	L9P_L_O_TMPFILE =	020000000U,
+};
+
+struct l9p_hdr {
+	uint8_t type;
+	uint16_t tag;
+	uint32_t fid;
+};
+
+struct l9p_qid {
+	enum l9p_qid_type type;
+	uint32_t version;
+	uint64_t path;
+};
+
+struct l9p_stat {
+	uint16_t type;
+	uint32_t dev;
+	struct l9p_qid qid;
+	uint32_t mode;
+	uint32_t atime;
+	uint32_t mtime;
+	uint64_t length;
+	char *name;
+	char *uid;
+	char *gid;
+	char *muid;
+	char *extension;
+	uint32_t n_uid;
+	uint32_t n_gid;
+	uint32_t n_muid;
+};
+
+#define	L9P_FSTYPE	 0x01021997
+
+struct l9p_statfs {
+	uint32_t type;		/* file system type */
+	uint32_t bsize;		/* block size for I/O */
+	uint64_t blocks;	/* file system size (bsize-byte blocks) */
+	uint64_t bfree;		/* free blocks in fs */
+	uint64_t bavail;	/* free blocks avail to non-superuser*/
+	uint64_t files;		/* file nodes in file system (# inodes) */
+	uint64_t ffree;		/* free file nodes in fs */
+	uint64_t fsid;		/* file system identifier */
+	uint32_t namelen;	/* maximum length of filenames */
+};
+
+struct l9p_f_version {
+	struct l9p_hdr hdr;
+	uint32_t msize;
+	char *version;
+};
+
+struct l9p_f_tflush {
+	struct l9p_hdr hdr;
+	uint16_t oldtag;
+};
+
+struct l9p_f_error {
+	struct l9p_hdr hdr;
+	char *ename;
+	uint32_t errnum;
+};
+
+struct l9p_f_ropen {
+	struct l9p_hdr hdr;
+	struct l9p_qid qid;
+	uint32_t iounit;
+};
+
+struct l9p_f_rauth {
+	struct l9p_hdr hdr;
+	struct l9p_qid aqid;
+};
+
+struct l9p_f_attach {
+	struct l9p_hdr hdr;
+	uint32_t afid;
+	char *uname;
+	char *aname;
+	uint32_t n_uname;
+};
+#define	L9P_NOFID ((uint32_t)-1)	/* in Tattach, no auth fid */
+#define	L9P_NONUNAME ((uint32_t)-1)	/* in Tattach, no n_uname */
+
+struct l9p_f_tcreate {
+	struct l9p_hdr hdr;
+	uint32_t perm;
+	char *name;
+	uint8_t mode; /* +Topen */
+	char *extension;
+};
+
+struct l9p_f_twalk {
+	struct l9p_hdr hdr;
+	uint32_t newfid;
+	uint16_t nwname;
+	char *wname[L9P_MAX_WELEM];
+};
+
+struct l9p_f_rwalk {
+	struct l9p_hdr hdr;
+	uint16_t nwqid;
+	struct l9p_qid wqid[L9P_MAX_WELEM];
+};
+
+struct l9p_f_io {
+	struct l9p_hdr hdr;
+	uint64_t offset; /* Tread, Twrite, Treaddir */
+	uint32_t count; /* Tread, Twrite, Rread, Treaddir, Rreaddir */
+};
+
+struct l9p_f_rstat {
+	struct l9p_hdr hdr;
+	struct l9p_stat stat;
+};
+
+struct l9p_f_twstat {
+	struct l9p_hdr hdr;
+	struct l9p_stat stat;
+};
+
+struct l9p_f_rstatfs {
+	struct l9p_hdr hdr;
+	struct l9p_statfs statfs;
+};
+
+/* Used for Tlcreate, Tlopen, Tmkdir, Tunlinkat. */
+struct l9p_f_tlcreate {
+	struct l9p_hdr hdr;
+	char *name;		/* Tlcreate, Tmkdir, Tunlinkat */
+	uint32_t flags;		/* Tlcreate, Tlopen, Tmkdir, Tunlinkat */
+	uint32_t mode;		/* Tlcreate, Tmkdir */
+	uint32_t gid;		/* Tlcreate, Tmkdir */
+};
+
+struct l9p_f_tsymlink {
+	struct l9p_hdr hdr;
+	char *name;
+	char *symtgt;
+	uint32_t gid;
+};
+
+struct l9p_f_tmknod {
+	struct l9p_hdr hdr;
+	char *name;
+	uint32_t mode;
+	uint32_t major;
+	uint32_t minor;
+	uint32_t gid;
+};
+
+struct l9p_f_trename {
+	struct l9p_hdr hdr;
+	uint32_t dfid;
+	char *name;
+};
+
+struct l9p_f_rreadlink {
+	struct l9p_hdr hdr;
+	char *target;
+};
+
+struct l9p_f_tgetattr {
+	struct l9p_hdr hdr;
+	uint64_t request_mask;
+};
+
+struct l9p_f_rgetattr {
+	struct l9p_hdr hdr;
+	uint64_t valid;
+	struct l9p_qid qid;
+	uint32_t mode;
+	uint32_t uid;
+	uint32_t gid;
+	uint64_t nlink;
+	uint64_t rdev;
+	uint64_t size;
+	uint64_t blksize;
+	uint64_t blocks;
+	uint64_t atime_sec;
+	uint64_t atime_nsec;
+	uint64_t mtime_sec;
+	uint64_t mtime_nsec;
+	uint64_t ctime_sec;
+	uint64_t ctime_nsec;
+	uint64_t btime_sec;
+	uint64_t btime_nsec;
+	uint64_t gen;
+	uint64_t data_version;
+};
+
+/* Fields in req->request_mask and reply->valid for Tgetattr, Rgetattr. */
+enum l9pl_getattr_flags {
+	L9PL_GETATTR_MODE = 0x00000001,
+	L9PL_GETATTR_NLINK = 0x00000002,
+	L9PL_GETATTR_UID = 0x00000004,
+	L9PL_GETATTR_GID = 0x00000008,
+	L9PL_GETATTR_RDEV = 0x00000010,
+	L9PL_GETATTR_ATIME = 0x00000020,
+	L9PL_GETATTR_MTIME = 0x00000040,
+	L9PL_GETATTR_CTIME = 0x00000080,
+	L9PL_GETATTR_INO = 0x00000100,
+	L9PL_GETATTR_SIZE = 0x00000200,
+	L9PL_GETATTR_BLOCKS = 0x00000400,
+	/* everything up to and including BLOCKS is BASIC */
+	L9PL_GETATTR_BASIC = L9PL_GETATTR_MODE |
+		L9PL_GETATTR_NLINK |
+		L9PL_GETATTR_UID |
+		L9PL_GETATTR_GID |
+		L9PL_GETATTR_RDEV |
+		L9PL_GETATTR_ATIME |
+		L9PL_GETATTR_MTIME |
+		L9PL_GETATTR_CTIME |
+		L9PL_GETATTR_INO |
+		L9PL_GETATTR_SIZE |
+		L9PL_GETATTR_BLOCKS,
+	L9PL_GETATTR_BTIME = 0x00000800,
+	L9PL_GETATTR_GEN = 0x00001000,
+	L9PL_GETATTR_DATA_VERSION = 0x00002000,
+	/* BASIC + birthtime + gen + data-version = ALL */
+	L9PL_GETATTR_ALL = L9PL_GETATTR_BASIC |
+		L9PL_GETATTR_BTIME |
+		L9PL_GETATTR_GEN |
+		L9PL_GETATTR_DATA_VERSION,
+};
+
+struct l9p_f_tsetattr {
+	struct l9p_hdr hdr;
+	uint32_t valid;
+	uint32_t mode;
+	uint32_t uid;
+	uint32_t gid;
+	uint64_t size;
+	uint64_t atime_sec;	/* if valid & L9PL_SETATTR_ATIME_SET */
+	uint64_t atime_nsec;	/* (else use on-server time) */
+	uint64_t mtime_sec;	/* if valid & L9PL_SETATTR_MTIME_SET */
+	uint64_t mtime_nsec;	/* (else use on-server time) */
+};
+
+/* Fields in req->valid for Tsetattr. */
+enum l9pl_setattr_flags {
+	L9PL_SETATTR_MODE = 0x00000001,
+	L9PL_SETATTR_UID = 0x00000002,
+	L9PL_SETATTR_GID = 0x00000004,
+	L9PL_SETATTR_SIZE = 0x00000008,
+	L9PL_SETATTR_ATIME = 0x00000010,
+	L9PL_SETATTR_MTIME = 0x00000020,
+	L9PL_SETATTR_CTIME = 0x00000040,
+	L9PL_SETATTR_ATIME_SET = 0x00000080,
+	L9PL_SETATTR_MTIME_SET = 0x00000100,
+};
+
+struct l9p_f_txattrwalk {
+	struct l9p_hdr hdr;
+	uint32_t newfid;
+	char *name;
+};
+
+struct l9p_f_rxattrwalk {
+	struct l9p_hdr hdr;
+	uint64_t size;
+};
+
+struct l9p_f_txattrcreate {
+	struct l9p_hdr hdr;
+	char *name;
+	uint64_t attr_size;
+	uint32_t flags;
+};
+
+struct l9p_f_tlock {
+	struct l9p_hdr hdr;
+	uint8_t type;		/* from l9pl_lock_type */
+	uint32_t flags;		/* from l9pl_lock_flags */
+	uint64_t start;
+	uint64_t length;
+	uint32_t proc_id;
+	char *client_id;
+};
+
+enum l9pl_lock_type {
+	L9PL_LOCK_TYPE_RDLOCK =	0,
+	L9PL_LOCK_TYPE_WRLOCK =	1,
+	L9PL_LOCK_TYPE_UNLOCK =	2,
+};
+
+enum l9pl_lock_flags {
+	L9PL_LOCK_TYPE_BLOCK = 1,
+	L9PL_LOCK_TYPE_RECLAIM = 2,
+};
+
+struct l9p_f_rlock {
+	struct l9p_hdr hdr;
+	uint8_t status;		/* from l9pl_lock_status */
+};
+
+enum l9pl_lock_status {
+	L9PL_LOCK_SUCCESS = 0,
+	L9PL_LOCK_BLOCKED = 1,
+	L9PL_LOCK_ERROR = 2,
+	L9PL_LOCK_GRACE = 3,
+};
+
+struct l9p_f_getlock {
+	struct l9p_hdr hdr;
+	uint8_t type;		/* from l9pl_lock_type */
+	uint64_t start;
+	uint64_t length;
+	uint32_t proc_id;
+	char *client_id;
+};
+
+struct l9p_f_tlink {
+	struct l9p_hdr hdr;
+	uint32_t dfid;
+	char *name;
+};
+
+struct l9p_f_trenameat {
+	struct l9p_hdr hdr;
+	char *oldname;
+	uint32_t newdirfid;
+	char *newname;
+};
+
+/*
+ * Flags in Tunlinkat (which re-uses f_tlcreate data structure but
+ * with different meaning).
+ */
+enum l9p_l_unlinkat_flags {
+	/* not sure if any other AT_* flags are passed through */
+	L9PL_AT_REMOVEDIR =	0x0200,
+};
+
+union l9p_fcall {
+	struct l9p_hdr hdr;
+	struct l9p_f_version version;
+	struct l9p_f_tflush tflush;
+	struct l9p_f_ropen ropen;
+	struct l9p_f_ropen rcreate;
+	struct l9p_f_ropen rattach;
+	struct l9p_f_error error;
+	struct l9p_f_rauth rauth;
+	struct l9p_f_attach tattach;
+	struct l9p_f_attach tauth;
+	struct l9p_f_tcreate tcreate;
+	struct l9p_f_tcreate topen;
+	struct l9p_f_twalk twalk;
+	struct l9p_f_rwalk rwalk;
+	struct l9p_f_twstat twstat;
+	struct l9p_f_rstat rstat;
+	struct l9p_f_rstatfs rstatfs;
+	struct l9p_f_tlcreate tlopen;
+	struct l9p_f_ropen rlopen;
+	struct l9p_f_tlcreate tlcreate;
+	struct l9p_f_ropen rlcreate;
+	struct l9p_f_tsymlink tsymlink;
+	struct l9p_f_ropen rsymlink;
+	struct l9p_f_tmknod tmknod;
+	struct l9p_f_ropen rmknod;
+	struct l9p_f_trename trename;
+	struct l9p_f_rreadlink rreadlink;
+	struct l9p_f_tgetattr tgetattr;
+	struct l9p_f_rgetattr rgetattr;
+	struct l9p_f_tsetattr tsetattr;
+	struct l9p_f_txattrwalk txattrwalk;
+	struct l9p_f_rxattrwalk rxattrwalk;
+	struct l9p_f_txattrcreate txattrcreate;
+	struct l9p_f_tlock tlock;
+	struct l9p_f_rlock rlock;
+	struct l9p_f_getlock getlock;
+	struct l9p_f_tlink tlink;
+	struct l9p_f_tlcreate tmkdir;
+	struct l9p_f_ropen rmkdir;
+	struct l9p_f_trenameat trenameat;
+	struct l9p_f_tlcreate tunlinkat;
+	struct l9p_f_io io;
+};
+
+#endif  /* LIB9P_FCALL_H */
Index: lib/lib9p/fid.h
===================================================================
--- /dev/null
+++ lib/lib9p/fid.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_FID_H
+#define LIB9P_FID_H
+
+#include <stdbool.h>
+
+/*
+ * Data structure for a fid.  All active fids in one session
+ * are stored in a hash table; the hash table provides the
+ * iterator to process them.  (See also l9p_connection in lib9p.h.)
+ *
+ * The back-end code has additional data per fid, found via
+ * lo_aux.  Currently this is allocated with a separate calloc().
+ *
+ * Most fids represent a file or directory, but a few are special
+ * purpose, including the auth fid from Tauth+Tattach, and the
+ * fids used for extended attributes.  We have our own set of
+ * flags here in lo_flags.
+ *
+ * Note that all new fids start as potentially-valid (reserving
+ * their 32-bit fid value), but not actually-valid.  If another
+ * (threaded) op is invoked on a not-yet-valid fid, the fid cannot
+ * be used.  A fid can also be locked against other threads, in
+ * which case they must wait for it: this happens during create
+ * and open, which on success result in the fid changing from a
+ * directory to a file.  (At least, all this applies in principle
+ * -- we're currently single-threaded per connection so the locks
+ * are nop-ed out and the valid bit is mainly just for debug.)
+ *
+ * Fids that are "open" (the underlying file or directory is open)
+ * are marked as well.
+ *
+ * Locking is managed by the front end (request.c); validation
+ * and type-marking can be done by either side as needed.
+ *
+ * Fid types and validity are manipulated by set* and unset*
+ * functions, and tested by is* ops.  Note that we only
+ * distinguish between "directory" and "not directory" at this
+ * level, i.e., symlinks and devices are just "not a directory
+ * fid".  Also, fids cannot be unset as auth or xattr fids,
+ * nor can an open fid become closed, except by being clunked.
+ * While files should not normally become directories, it IS normal
+ * for directory fids to become file fids due to Twalk operations.
+ *
+ * (These accessor functions are just to leave wiggle room for
+ * different future implementations.)
+ */
+struct l9p_fid {
+	void	*lo_aux;
+	uint32_t lo_fid;
+	uint32_t lo_flags;	/* volatile atomic_t when threaded? */
+};
+
+enum l9p_lo_flags {
+	L9P_LO_ISAUTH = 0x01,
+	L9P_LO_ISDIR = 0x02,
+	L9P_LO_ISOPEN = 0x04,
+	L9P_LO_ISVALID = 0x08,
+	L9P_LO_ISXATTR = 0x10,
+};
+
+static inline bool
+l9p_fid_isauth(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISAUTH) != 0);
+}
+
+static inline void
+l9p_fid_setauth(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISAUTH;
+}
+
+static inline bool
+l9p_fid_isdir(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISDIR) != 0);
+}
+
+static inline void
+l9p_fid_setdir(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISDIR;
+}
+
+static inline void
+l9p_fid_unsetdir(struct l9p_fid *fid)
+{
+	fid->lo_flags &= ~(uint32_t)L9P_LO_ISDIR;
+}
+
+static inline bool
+l9p_fid_isopen(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISOPEN) != 0);
+}
+
+static inline void
+l9p_fid_setopen(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISOPEN;
+}
+
+static inline bool
+l9p_fid_isvalid(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISVALID) != 0);
+}
+
+static inline void
+l9p_fid_setvalid(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISVALID;
+}
+
+static inline void
+l9p_fid_unsetvalid(struct l9p_fid *fid)
+{
+	fid->lo_flags &= ~(uint32_t)L9P_LO_ISVALID;
+}
+
+static inline bool
+l9p_fid_isxattr(struct l9p_fid *fid)
+{
+	return ((fid->lo_flags & L9P_LO_ISXATTR) != 0);
+}
+
+static inline void
+l9p_fid_setxattr(struct l9p_fid *fid)
+{
+	fid->lo_flags |= L9P_LO_ISXATTR;
+}
+
+#endif  /* LIB9P_FID_H */
Index: lib/lib9p/genacl.h
===================================================================
--- /dev/null
+++ lib/lib9p/genacl.h
@@ -0,0 +1,307 @@
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * General ACL support for 9P2000.L.
+ *
+ * We mostly use Linux's xattr name space and nfs4 ACL bits, as
+ * these are the most general forms available.
+ *
+ * Linux requests attributes named
+ *
+ *     "system.posix_acl_default"
+ *     "system.posix_acl_access"
+ *
+ * to get POSIX style ACLs, and:
+ *
+ *     "system.nfs4_acl"
+ *
+ * to get NFSv4 style ACLs.  The v9fs client does not explicitly
+ * ask for the latter, but if you use the Ubuntu nfs4-acl-tools
+ * package, it should be able to read and write these.
+ *
+ * For the record, the Linux kernel source code also shows:
+ *
+ *  - Lustre uses "trusted.*", with "*" matching "lov", "lma",
+ *    "lmv", "dmv", "link", "fid", "version", "som", "hsm", and
+ *    "lfsck_namespace".
+ *
+ *  - ceph has a name tree of the form "ceph.<type>.<name>" with
+ *     <type,name> pairs like <"dir","entries">, <"dir","files>,
+ *     <"file","layout">, and so on.
+ *
+ *  - ext4 uses the POSIX names, plus some special ext4-specific
+ *    goop that might not get externalized.
+ *
+ *  - NFS uses both the POSIX names and the NFSv4 ACLs.  However,
+ *    what it mainly does is have nfsd generate fake NFSv4 ACLs
+ *    from POSIX ACLs.  If you run an NFS client, the client
+ *    relies on the server actually implementing the ACLs, and
+ *    lets nfs4-acl-tools read and write the system.nfs4_acl xattr
+ *    data.  If you run an NFS server off, e.g., an ext4 file system,
+ *    the server looks for the system.nfs4_acl xattr, serves that
+ *    out if found, and otherwise just generates the fakes.
+ *
+ *  - "security.*" and "selinux.*" are reserved.
+ *
+ *  - "security.capability" is the name for capabilities.
+ *
+ *  - sockets use "system.sockprotoname".
+ */
+
+#if defined(__APPLE__)
+  #define HAVE_POSIX_ACLS
+  #define HAVE_DARWIN_ACLS
+#endif
+
+#if defined(__FreeBSD__)
+  #define HAVE_POSIX_ACLS
+  #define HAVE_FREEBSD_ACLS
+#endif
+
+#include <sys/types.h>
+#include <sys/acl.h>		/* XXX assumes existence of sys/acl.h */
+
+/*
+ * An ACL consists of a number of ACEs that grant some kind of
+ * "allow" or "deny" to some specific entity.
+ *
+ * The number of ACEs is potentially unlimited, although in practice
+ * they tend not to be that long.
+ *
+ * It's the responsibility of the back-end to supply the ACL
+ * for each test.  However, the ACL may be in some sort of
+ * system-specific form.  It's the responsibility of some
+ * (system-specific) code to translate it to *this* form, after
+ * which the backend may use l9p_acl_check_access() to get
+ * access granted or denied (and, eventually, audits and alarms
+ * recorded and raises, although that's yet to be designed).
+ *
+ * The reason for all this faffing-about with formats is so that
+ * we can *report* the ACLs using Linux 9p style xattrs.
+ */
+
+struct l9p_acl;
+struct l9p_fid;
+
+void l9p_acl_free(struct l9p_acl *);
+
+/*
+ * An ACL is made up of ACEs.
+ *
+ * Each ACE has:
+ *
+ *   - a type: allow, deny, audit, alarm
+ *   - a set of flags
+ *   - permissions bits: a "mask"
+ *   - an optional, nominally-variable-length identity
+ *
+ * The last part is especially tricky and currently has limited
+ * support here: it's always a 16 byte field on Darwin, and just
+ * a uint32_t on BSD (should be larger, really).  Linux supports
+ * very large, actually-variable-size values; we'll deal with
+ * this later, maybe.
+ *
+ * We will define the mask first, below, since these are also the bits
+ * passed in for the accmask argument to l9p_acl_check_access().
+ */
+
+/*
+ * ACL entry mask, and accmask argument flags.
+ *
+ * NB: not every bit is implemented, but they are all here because
+ * they are all defined as part of an NFSv4 ACL entry, which is
+ * more or less a superset of a POSIX ACL entry.  This means you
+ * can put a complete NFSv4 ACL in and we can reproduce it.
+ *
+ * Note that the LIST_DIRECTORY, ADD_FILE, and ADD_SUBDIRECTORY bits
+ * apply only to a directory, while the READ_DATA, WRITE_DATA, and
+ * APPEND_DATA bits apply only to a file.  See aca_parent/aca_child
+ * below.
+ */
+#define	L9P_ACE_READ_DATA		0x00001
+#define	L9P_ACE_LIST_DIRECTORY		0x00001 /* same as READ_DATA */
+#define	L9P_ACE_WRITE_DATA		0x00002
+#define	L9P_ACE_ADD_FILE		0x00002 /* same as WRITE_DATA */
+#define	L9P_ACE_APPEND_DATA		0x00004
+#define	L9P_ACE_ADD_SUBDIRECTORY	0x00004 /* same as APPEND_DATA */
+#define	L9P_ACE_READ_NAMED_ATTRS	0x00008
+#define	L9P_ACE_WRITE_NAMED_ATTRS	0x00010
+#define	L9P_ACE_EXECUTE			0x00020
+#define	L9P_ACE_DELETE_CHILD		0x00040
+#define	L9P_ACE_READ_ATTRIBUTES		0x00080
+#define	L9P_ACE_WRITE_ATTRIBUTES	0x00100
+#define	L9P_ACE_WRITE_RETENTION		0x00200 /* not used here */
+#define	L9P_ACE_WRITE_RETENTION_HOLD	0x00400 /* not used here */
+/*					0x00800 unused? */
+#define	L9P_ACE_DELETE			0x01000
+#define	L9P_ACE_READ_ACL		0x02000
+#define	L9P_ACE_WRITE_ACL		0x04000
+#define	L9P_ACE_WRITE_OWNER		0x08000
+#define	L9P_ACE_SYNCHRONIZE		0x10000 /* not used here */
+
+/*
+ * This is not an ACE bit, but is used with the access checking
+ * below.  It represents a request to unlink (delete child /
+ * delete) an entity, and is equivalent to asking for *either*
+ * (not both) permission.
+ */
+#define	L9P_ACOP_UNLINK (L9P_ACE_DELETE_CHILD | L9P_ACE_DELETE)
+
+/*
+ * Access checking takes a lot of arguments, so they are
+ * collected into a "struct" here.
+ *
+ * The aca_parent and aca_pstat fields may/must be NULL if the
+ * operation itself does not involve "directory" permissions.
+ * The aca_child and aca_cstat fields may/must be NULL if the
+ * operation does not involve anything *but* a directory.  This
+ * is how we decide whether you're interested in L9P_ACE_READ_DATA
+ * vs L9P_ACE_LIST_DIRECTORY, for instance.
+ *
+ * Note that it's OK for both parent and child to be directories
+ * (as is the case when we're adding or deleting a subdirectory).
+ */
+struct l9p_acl_check_args {
+	uid_t	aca_uid;		/* the uid that is requesting access */
+	gid_t	aca_gid;		/* the gid that is requesting access */
+	gid_t	*aca_groups;		/* the additional group-set, if any */
+	size_t	aca_ngroups;		/* number of groups in group-set */
+	struct l9p_acl *aca_parent;	/* ACLs associated with parent/dir */
+	struct stat *aca_pstat;		/* stat data for parent/dir */
+	struct l9p_acl *aca_child;	/* ACLs associated with file */
+	struct stat *aca_cstat;		/* stat data for file */
+	int	aca_aclmode;		/* mode checking bits, see below */
+	bool	aca_superuser;		/* alway allow uid==0 in STAT_MODE */
+};
+
+/*
+ * Access checking mode bits in aca_checkmode.  If you enable
+ * ACLs, they are used first, optionally with ZFS style ACLs.
+ * This means that even if aca_superuser is set, if an ACL denies
+ * permission to uid 0, permission is really denied.
+ *
+ * NFS style ACLs run before POSIX style ACLs (though POSIX
+ * ACLs aren't done yet anyway).
+ *
+ * N.B.: you probably want L9P_ACL_ZFS, especially when operating
+ * with a ZFS file system on FreeBSD.
+ */
+#define	L9P_ACM_NFS_ACL		0x0001	/* enable NFS ACL checking */
+#define	L9P_ACM_ZFS_ACL		0x0002	/* use ZFS ACL unlink semantics */
+#define	L9P_ACM_POSIX_ACL	0x0004	/* enable POSIX ACL checking (notyet) */
+#define	L9P_ACM_STAT_MODE	0x0008	/* enable st_mode bits */
+
+/*
+ * Requests to access some file or directory must provide:
+ *
+ *  - An operation.  This should usually be just one bit from the
+ *    L9P_ACE_* bit-sets above, or our special L9P_ACOP_UNLINK.
+ *    For a few file-open operations it may be multiple bits,
+ *    e.g., both read and write data.
+ *  - The identity of the accessor: uid + gid + gid-set.
+ *  - The type of access desired: this may be multiple bits.
+ *  - The parent directory, if applicable.
+ *  - The child file/dir being accessed, if applicable.
+ *  - stat data for parent and/or child, if applicable.
+ *
+ * The ACLs and/or stat data of the parent and/or child get used
+ * here, so the caller must provide them.  We should have a way to
+ * cache these on fids, but not yet.  The parent and child
+ * arguments are a bit tricky; see the code in genacl.c.
+ */
+int l9p_acl_check_access(int32_t op, struct l9p_acl_check_args *args);
+
+/*
+ * When falling back to POSIX ACL or Unix-style permissions
+ * testing, it's nice to collapse the above detailed permissions
+ * into simple read/write/execute bits (value 0..7).  We provide
+ * a small utility function that does this.
+ */
+int l9p_ace_mask_to_rwx(int32_t);
+
+/*
+ * The rest of the data in an ACE.
+ */
+
+/* type in ace_type */
+#define	L9P_ACET_ACCESS_ALLOWED		0
+#define	L9P_ACET_ACCESS_DENIED		1
+#define	L9P_ACET_SYSTEM_AUDIT		2
+#define	L9P_ACET_SYSTEM_ALARM		3
+
+/* flags in ace_flags */
+#define	L9P_ACEF_FILE_INHERIT_ACE		0x001
+#define	L9P_ACEF_DIRECTORY_INHERIT_ACE		0x002
+#define	L9P_ACEF_NO_PROPAGATE_INHERIT_ACE	0x004
+#define	L9P_ACEF_INHERIT_ONLY_ACE		0x008
+#define	L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG	0x010
+#define	L9P_ACEF_FAILED_ACCESS_ACE_FLAG		0x020
+#define	L9P_ACEF_IDENTIFIER_GROUP		0x040
+#define	L9P_ACEF_OWNER				0x080
+#define	L9P_ACEF_GROUP				0x100
+#define	L9P_ACEF_EVERYONE			0x200
+
+#if defined(__APPLE__)
+#  define L9P_ACE_IDSIZE 16 /* but, how do we map Darwin uuid? */
+#else
+#  define L9P_ACE_IDSIZE 4
+#endif
+
+struct l9p_ace {
+	uint16_t ace_type;		/* ACL entry type */
+	uint16_t ace_flags;		/* ACL entry flags */
+	uint32_t ace_mask;		/* ACL entry mask */
+	uint32_t ace_idsize;		/* length of ace_idbytes */
+	unsigned char ace_idbytes[L9P_ACE_IDSIZE];
+};
+
+#define	L9P_ACLTYPE_NFSv4	1	/* currently the only valid type */
+struct l9p_acl {
+	uint32_t acl_acetype;		/* reserved for future expansion */
+	uint32_t acl_nace;		/* number of occupied ACEs */
+	uint32_t acl_aceasize;		/* actual size of ACE array */
+	struct l9p_ace acl_aces[];	/* variable length ACE array */
+};
+
+/*
+ * These are the system-specific converters.
+ *
+ * Right now the backend needs to just find BSD NFSv4 ACLs
+ * and convert them before each operation that needs to be
+ * tested.
+ */
+#if defined(HAVE_DARWIN_ACLS)
+struct l9p_acl *l9p_darwin_nfsv4acl_to_acl(acl_t acl);
+#endif
+
+#if defined(HAVE_FREEBSD_ACLS)
+struct l9p_acl *l9p_freebsd_nfsv4acl_to_acl(acl_t acl);
+#endif
+
+#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */
+struct l9p_acl *l9p_posix_acl_to_acl(acl_t acl);
+#endif
Index: lib/lib9p/genacl.c
===================================================================
--- /dev/null
+++ lib/lib9p/genacl.c
@@ -0,0 +1,720 @@
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/acl.h>
+#include <sys/stat.h>
+
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "genacl.h"
+#include "fid.h"
+#include "log.h"
+
+typedef int econvertfn(acl_entry_t, struct l9p_ace *);
+
+#ifndef __APPLE__
+static struct l9p_acl *l9p_new_acl(uint32_t acetype, uint32_t aceasize);
+static struct l9p_acl *l9p_growacl(struct l9p_acl *acl, uint32_t aceasize);
+static int l9p_count_aces(acl_t sysacl);
+static struct l9p_acl *l9p_sysacl_to_acl(int, acl_t, econvertfn *);
+#endif
+static bool l9p_ingroup(gid_t tid, gid_t gid, gid_t *gids, size_t ngids);
+static int l9p_check_aces(int32_t mask, struct l9p_acl *acl, struct stat *st,
+    uid_t uid, gid_t gid, gid_t *gids, size_t ngids);
+
+void
+l9p_acl_free(struct l9p_acl *acl)
+{
+
+	free(acl);
+}
+
+/*
+ * Is the given group ID tid (test-id) any of the gid's in agids?
+ */
+static bool
+l9p_ingroup(gid_t tid, gid_t gid, gid_t *gids, size_t ngids)
+{
+	size_t i;
+
+	if (tid == gid)
+		return (true);
+	for (i = 0; i < ngids; i++)
+		if (tid == gids[i])
+			return (true);
+	return (false);
+}
+
+/* #define ACE_DEBUG */
+
+/*
+ * Note that NFSv4 tests are done on a "first match" basis.
+ * That is, we check each ACE sequentially until we run out
+ * of ACEs, or find something explicitly denied (DENIED!),
+ * or have cleared out all our attempt-something bits.  Once
+ * we come across an ALLOW entry for the bits we're trying,
+ * we clear those from the bits we're still looking for, in
+ * the order they appear.
+ *
+ * The result is either "definitely allowed" (we cleared
+ * all the bits), "definitely denied" (we hit a deny with
+ * some or all of the bits), or "unspecified".  We
+ * represent these three states as +1 (positive = yes = allow),
+ * -1 (negative = no = denied), or 0 (no strong answer).
+ *
+ * For our caller's convenience, if we are called with a
+ * mask of 0, we return 0 (no answer).
+ */
+static int
+l9p_check_aces(int32_t mask, struct l9p_acl *acl, struct stat *st,
+    uid_t uid, gid_t gid, gid_t *gids, size_t ngids)
+{
+	uint32_t i;
+	struct l9p_ace *ace;
+#ifdef ACE_DEBUG
+	const char *acetype, *allowdeny;
+	bool show_tid;
+#endif
+	bool match;
+	uid_t tid;
+
+	if (mask == 0)
+		return (0);
+
+	for (i = 0; mask != 0 && i < acl->acl_nace; i++) {
+		ace = &acl->acl_aces[i];
+		switch (ace->ace_type) {
+		case L9P_ACET_ACCESS_ALLOWED:
+		case L9P_ACET_ACCESS_DENIED:
+			break;
+		default:
+			/* audit, alarm - ignore */
+			continue;
+		}
+#ifdef ACE_DEBUG
+		show_tid = false;
+#endif
+		if (ace->ace_flags & L9P_ACEF_OWNER) {
+#ifdef ACE_DEBUG
+			acetype = "OWNER@";
+#endif
+			match = st->st_uid == uid;
+		} else if (ace->ace_flags & L9P_ACEF_GROUP) {
+#ifdef ACE_DEBUG
+			acetype = "GROUP@";
+#endif
+			match = l9p_ingroup(st->st_gid, gid, gids, ngids);
+		} else if (ace->ace_flags & L9P_ACEF_EVERYONE) {
+#ifdef ACE_DEBUG
+			acetype = "EVERYONE@";
+#endif
+			match = true;
+		} else {
+			if (ace->ace_idsize != sizeof(tid))
+				continue;
+#ifdef ACE_DEBUG
+			show_tid = true;
+#endif
+			memcpy(&tid, &ace->ace_idbytes, sizeof(tid));
+			if (ace->ace_flags & L9P_ACEF_IDENTIFIER_GROUP) {
+#ifdef ACE_DEBUG
+				acetype = "group";
+#endif
+				match = l9p_ingroup(tid, gid, gids, ngids);
+			} else {
+#ifdef ACE_DEBUG
+				acetype = "user";
+#endif
+				match = tid == uid;
+			}
+		}
+		/*
+		 * If this ACE applies to us, check remaining bits.
+		 * If any of those bits also apply, check the type:
+		 * DENY means "stop now", ALLOW means allow these bits
+		 * and keep checking.
+		 */
+#ifdef ACE_DEBUG
+		allowdeny = ace->ace_type == L9P_ACET_ACCESS_DENIED ?
+		    "deny" : "allow";
+#endif
+		if (match && (ace->ace_mask & (uint32_t)mask) != 0) {
+#ifdef ACE_DEBUG
+			if (show_tid)
+				L9P_LOG(L9P_DEBUG,
+				    "ACE: %s %s %d: mask 0x%x ace_mask 0x%x",
+				    allowdeny, acetype, (int)tid,
+				    (u_int)mask, (u_int)ace->ace_mask);
+			else
+				L9P_LOG(L9P_DEBUG,
+				    "ACE: %s %s: mask 0x%x ace_mask 0x%x",
+				    allowdeny, acetype,
+				    (u_int)mask, (u_int)ace->ace_mask);
+#endif
+			if (ace->ace_type == L9P_ACET_ACCESS_DENIED)
+				return (-1);
+			mask &= ~ace->ace_mask;
+#ifdef ACE_DEBUG
+			L9P_LOG(L9P_DEBUG, "clear 0x%x: now mask=0x%x",
+			    (u_int)ace->ace_mask, (u_int)mask);
+#endif
+		} else {
+#ifdef ACE_DEBUG
+			if (show_tid)
+				L9P_LOG(L9P_DEBUG,
+				    "ACE: SKIP %s %s %d: "
+				    "match %d mask 0x%x ace_mask 0x%x",
+				    allowdeny, acetype, (int)tid,
+				    (int)match, (u_int)mask,
+				    (u_int)ace->ace_mask);
+			else
+				L9P_LOG(L9P_DEBUG,
+				    "ACE: SKIP %s %s: "
+				    "match %d mask 0x%x ace_mask 0x%x",
+				    allowdeny, acetype,
+				    (int)match, (u_int)mask,
+				    (u_int)ace->ace_mask);
+#endif
+		}
+	}
+
+	/* Return 1 if access definitely granted. */
+#ifdef ACE_DEBUG
+	L9P_LOG(L9P_DEBUG, "ACE: end of ACEs, mask now 0x%x: %s",
+	    mask, mask ? "no-definitive-answer" : "ALLOW");
+#endif
+	return (mask == 0 ? 1 : 0);
+}
+
+/*
+ * Test against ACLs.
+ *
+ * The return value is normally 0 (access allowed) or EPERM
+ * (access denied), so it could just be a boolean....
+ *
+ * For "make new dir in dir" and "remove dir in dir", you must
+ * set the mask to test the directory permissions (not ADD_FILE but
+ * ADD_SUBDIRECTORY, and DELETE_CHILD).  For "make new file in dir"
+ * you must set the opmask to test file ADD_FILE.
+ *
+ * The L9P_ACE_DELETE flag means "can delete this thing"; it's not
+ * clear whether it should override the parent directory's ACL if
+ * any.  In our case it does not, but a caller may try
+ * L9P_ACE_DELETE_CHILD (separately, on its own) and then a
+ * (second, separate) L9P_ACE_DELETE, to make the permissions work
+ * as "or" instead of "and".
+ *
+ * Pass a NULL parent/pstat if they are not applicable, e.g.,
+ * for doing operations on an existing file, such as reading or
+ * writing data or attributes.  Pass in a null child/cstat if
+ * that's not applicable, such as creating a new file/dir.
+ *
+ * NB: it's probably wise to allow the owner of any file to update
+ * the ACLs of that file, but we leave that test to the caller.
+ */
+int l9p_acl_check_access(int32_t opmask, struct l9p_acl_check_args *args)
+{
+	struct l9p_acl *parent, *child;
+	struct stat *pstat, *cstat;
+	int32_t pop, cop;
+	size_t ngids;
+	uid_t uid;
+	gid_t gid, *gids;
+	int panswer, canswer;
+
+	assert(opmask != 0);
+	parent = args->aca_parent;
+	pstat = args->aca_pstat;
+	child = args->aca_child;
+	cstat = args->aca_cstat;
+	uid = args->aca_uid;
+	gid = args->aca_gid;
+	gids = args->aca_groups;
+	ngids = args->aca_ngroups;
+
+#ifdef ACE_DEBUG
+	L9P_LOG(L9P_DEBUG,
+	    "l9p_acl_check_access: opmask=0x%x uid=%ld gid=%ld ngids=%zd",
+	    (u_int)opmask, (long)uid, (long)gid, ngids);
+#endif
+	/*
+	 * If caller said "superuser semantics", check that first.
+	 * Note that we apply them regardless of ACLs.
+	 */
+	if (uid == 0 && args->aca_superuser)
+		return (0);
+
+	/*
+	 * If told to ignore ACLs and use only stat-based permissions,
+	 * discard any non-NULL ACL pointers.
+	 *
+	 * This will need some fancying up when we support POSIX ACLs.
+	 */
+	if ((args->aca_aclmode & L9P_ACM_NFS_ACL) == 0)
+		parent = child = NULL;
+
+	assert(parent == NULL || parent->acl_acetype == L9P_ACLTYPE_NFSv4);
+	assert(parent == NULL || pstat != NULL);
+	assert(child == NULL || child->acl_acetype == L9P_ACLTYPE_NFSv4);
+	assert(child == NULL || cstat != NULL);
+	assert(pstat != NULL || cstat != NULL);
+
+	/*
+	 * If the operation is UNLINK we should have either both ACLs
+	 * or no ACLs, but we won't require that here.
+	 *
+	 * If a parent ACL is supplied, it's a directory by definition.
+	 * Make sure we're allowed to do this there, whatever this is.
+	 * If a child ACL is supplied, check it too.  Note that the
+	 * DELETE permission only applies in the child though, not
+	 * in the parent, and the DELETE_CHILD only applies in the
+	 * parent.
+	 */
+	pop = cop = opmask;
+	if (parent != NULL || pstat != NULL) {
+		/*
+		 * Remove child-only bits from parent op and
+		 * parent-only bits from child op.
+		 *
+		 * L9P_ACE_DELETE is child-only.
+		 *
+		 * L9P_ACE_DELETE_CHILD is parent-only, and three data
+		 * access bits overlap with three directory access bits.
+		 * We should have child==NULL && cstat==NULL, so the
+		 * three data bits should be redundant, but it's
+		 * both trivial and safest to remove them anyway.
+		 */
+		pop &= ~L9P_ACE_DELETE;
+		cop &= ~(L9P_ACE_DELETE_CHILD | L9P_ACE_LIST_DIRECTORY |
+		    L9P_ACE_ADD_FILE | L9P_ACE_ADD_SUBDIRECTORY);
+	} else {
+		/*
+		 * Remove child-only bits from parent op.  We need
+		 * not bother since we just found we have no parent
+		 * and no pstat, and hence won't actually *use* pop.
+		 *
+		 * pop &= ~(L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA |
+		 *     L9P_ACE_APPEND_DATA);
+		 */
+	}
+	panswer = 0;
+	canswer = 0;
+	if (parent != NULL)
+		panswer = l9p_check_aces(pop, parent, pstat,
+		    uid, gid, gids, ngids);
+	if (child != NULL)
+		canswer = l9p_check_aces(cop, child, cstat,
+		    uid, gid, gids, ngids);
+
+	if (panswer || canswer) {
+		/*
+		 * Got a definitive answer from parent and/or
+		 * child ACLs.  We're not quite done yet though.
+		 */
+		if (opmask == L9P_ACOP_UNLINK) {
+			/*
+			 * For UNLINK, we can get an allow from child
+			 * and deny from parent, or vice versa.  It's
+			 * not 100% clear how to handle the two-answer
+			 * case.  ZFS says that if either says "allow",
+			 * we allow, and if both definitely say "deny",
+			 * we deny.  This makes sense, so we do that
+			 * here for all cases, even "strict".
+			 */
+			if (panswer > 0 || canswer > 0)
+				return (0);
+			if (panswer < 0 && canswer < 0)
+				return (EPERM);
+			/* non-definitive answer from one! move on */
+		} else {
+			/*
+			 * Have at least one definitive answer, and
+			 * should have only one; obey whichever
+			 * one it is.
+			 */
+			if (panswer)
+				return (panswer < 0 ? EPERM : 0);
+			return (canswer < 0 ? EPERM : 0);
+		}
+	}
+
+	/*
+	 * No definitive answer from ACLs alone.  Check for ZFS style
+	 * permissions checking and an "UNLINK" operation under ACLs.
+	 * If so, find write-and-execute permission on parent.
+	 * Note that WRITE overlaps with ADD_FILE -- that's ZFS's
+	 * way of saying "allow write to dir" -- but EXECUTE is
+	 * separate from LIST_DIRECTORY, so that's at least a little
+	 * bit cleaner.
+	 *
+	 * Note also that only a definitive yes (both bits are
+	 * explicitly allowed) results in granting unlink, and
+	 * a definitive no (at least one bit explicitly denied)
+	 * results in EPERM.  Only "no answer" moves on.
+	 */
+	if ((args->aca_aclmode & L9P_ACM_ZFS_ACL) &&
+	    opmask == L9P_ACOP_UNLINK && parent != NULL) {
+		panswer = l9p_check_aces(L9P_ACE_ADD_FILE | L9P_ACE_EXECUTE,
+		    parent, pstat, uid, gid, gids, ngids);
+		if (panswer)
+			return (panswer < 0 ? EPERM : 0);
+	}
+
+	/*
+	 * No definitive answer from ACLs.
+	 *
+	 * Try POSIX style rwx permissions if allowed.  This should
+	 * be rare, occurring mainly when caller supplied no ACLs
+	 * or set the mode to suppress them.
+	 *
+	 * The stat to check is the parent's if we don't have a child
+	 * (i.e., this is a dir op), or if the DELETE_CHILD bit is set
+	 * (i.e., this is an unlink or similar).  Otherwise it's the
+	 * child's.
+	 */
+	if (args->aca_aclmode & L9P_ACM_STAT_MODE) {
+		struct stat *st;
+		int rwx, bits;
+
+		rwx = l9p_ace_mask_to_rwx(opmask);
+		if ((st = cstat) == NULL || (opmask & L9P_ACE_DELETE_CHILD))
+			st = pstat;
+		if (uid == st->st_uid)
+			bits = (st->st_mode >> 6) & 7;
+		else if (l9p_ingroup(st->st_gid, gid, gids, ngids))
+			bits = (st->st_mode >> 3) & 7;
+		else
+			bits = st->st_mode & 7;
+		/*
+		 * If all the desired bits are set, we're OK.
+		 */
+		if ((rwx & bits) == rwx)
+			return (0);
+	}
+
+	/* all methods have failed, return EPERM */
+	return (EPERM);
+}
+
+/*
+ * Collapse fancy ACL operation mask down to simple Unix bits.
+ *
+ * Directory operations don't map that well.  However, listing
+ * a directory really does require read permission, and adding
+ * or deleting files really does require write permission, so
+ * this is probably sufficient.
+ */
+int
+l9p_ace_mask_to_rwx(int32_t opmask)
+{
+	int rwx = 0;
+
+	if (opmask &
+	    (L9P_ACE_READ_DATA | L9P_ACE_READ_NAMED_ATTRS |
+	     L9P_ACE_READ_ATTRIBUTES | L9P_ACE_READ_ACL))
+		rwx |= 4;
+	if (opmask &
+	    (L9P_ACE_WRITE_DATA | L9P_ACE_APPEND_DATA |
+	     L9P_ACE_ADD_FILE | L9P_ACE_ADD_SUBDIRECTORY |
+	     L9P_ACE_DELETE | L9P_ACE_DELETE_CHILD |
+	     L9P_ACE_WRITE_NAMED_ATTRS | L9P_ACE_WRITE_ATTRIBUTES |
+	     L9P_ACE_WRITE_ACL))
+		rwx |= 2;
+	if (opmask & L9P_ACE_EXECUTE)
+		rwx |= 1;
+	return (rwx);
+}
+
+#ifndef __APPLE__
+/*
+ * Allocate new ACL holder and ACEs.
+ */
+static struct l9p_acl *
+l9p_new_acl(uint32_t acetype, uint32_t aceasize)
+{
+	struct l9p_acl *ret;
+	size_t asize, size;
+
+	asize = aceasize * sizeof(struct l9p_ace);
+	size = sizeof(struct l9p_acl) + asize;
+	ret = malloc(size);
+	if (ret != NULL) {
+		ret->acl_acetype = acetype;
+		ret->acl_nace = 0;
+		ret->acl_aceasize = aceasize;
+	}
+	return (ret);
+}
+
+/*
+ * Expand ACL to accomodate more entries.
+ *
+ * Currently won't shrink, only grow, so it's a fast no-op until
+ * we hit the allocated size.  After that, it's best to grow in
+ * big chunks, or this will be O(n**2).
+ */
+static struct l9p_acl *
+l9p_growacl(struct l9p_acl *acl, uint32_t aceasize)
+{
+	struct l9p_acl *tmp;
+	size_t asize, size;
+
+	if (acl->acl_aceasize < aceasize) {
+		asize = aceasize * sizeof(struct l9p_ace);
+		size = sizeof(struct l9p_acl) + asize;
+		tmp = realloc(acl, size);
+		if (tmp == NULL)
+			free(acl);
+		acl = tmp;
+	}
+	return (acl);
+}
+
+/*
+ * Annoyingly, there's no POSIX-standard way to count the number
+ * of ACEs in a system ACL other than to walk through them all.
+ * This is silly, but at least 2n is still O(n), and the walk is
+ * short.  (If the system ACL mysteriously grows, we'll handle
+ * that OK via growacl(), too.)
+ */
+static int
+l9p_count_aces(acl_t sysacl)
+{
+	acl_entry_t entry;
+	uint32_t n;
+	int id;
+
+	id = ACL_FIRST_ENTRY;
+	for (n = 0; acl_get_entry(sysacl, id, &entry) == 1; n++)
+		id = ACL_NEXT_ENTRY;
+
+	return ((int)n);
+}
+
+/*
+ * Create ACL with ACEs from the given acl_t.  We use the given
+ * convert function on each ACE.
+ */
+static struct l9p_acl *
+l9p_sysacl_to_acl(int acetype, acl_t sysacl, econvertfn *convert)
+{
+	struct l9p_acl *acl;
+	acl_entry_t entry;
+	uint32_t n;
+	int error, id;
+
+	acl = l9p_new_acl((uint32_t)acetype, (uint32_t)l9p_count_aces(sysacl));
+	if (acl == NULL)
+		return (NULL);
+	id = ACL_FIRST_ENTRY;
+	for (n = 0;;) {
+		if (acl_get_entry(sysacl, id, &entry) != 1)
+			break;
+		acl = l9p_growacl(acl, n + 1);
+		if (acl == NULL)
+			return (NULL);
+		error = (*convert)(entry, &acl->acl_aces[n]);
+		id = ACL_NEXT_ENTRY;
+		if (error == 0)
+			n++;
+	}
+	acl->acl_nace = n;
+	return (acl);
+}
+#endif
+
+#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */
+struct l9p_acl *
+l9p_posix_acl_to_acl(acl_t sysacl)
+{
+}
+#endif
+
+#if defined(HAVE_FREEBSD_ACLS)
+static int
+l9p_frombsdnfs4(acl_entry_t sysace, struct l9p_ace *ace)
+{
+	acl_tag_t tag;			/* e.g., USER_OBJ, GROUP, etc */
+	acl_entry_type_t entry_type;	/* e.g., allow/deny */
+	acl_permset_t absdperm;
+	acl_flagset_t absdflag;
+	acl_perm_t bsdperm;		/* e.g., READ_DATA */
+	acl_flag_t bsdflag;		/* e.g., FILE_INHERIT_ACE */
+	uint32_t flags, mask;
+	int error;
+	uid_t uid, *aid;
+
+	error = acl_get_tag_type(sysace, &tag);
+	if (error == 0)
+		error = acl_get_entry_type_np(sysace, &entry_type);
+	if (error == 0)
+		error = acl_get_flagset_np(sysace, &absdflag);
+	if (error == 0)
+		error = acl_get_permset(sysace, &absdperm);
+	if (error)
+		return (error);
+
+	flags = 0;
+	uid = 0;
+	aid = NULL;
+
+	/* move user/group/everyone + id-is-group-id into flags */
+	switch (tag) {
+	case ACL_USER_OBJ:
+		flags |= L9P_ACEF_OWNER;
+		break;
+	case ACL_GROUP_OBJ:
+		flags |= L9P_ACEF_GROUP;
+		break;
+	case ACL_EVERYONE:
+		flags |= L9P_ACEF_EVERYONE;
+		break;
+	case ACL_GROUP:
+		flags |= L9P_ACEF_IDENTIFIER_GROUP;
+		/* FALLTHROUGH */
+	case ACL_USER:
+		aid = acl_get_qualifier(sysace); /* ugh, this malloc()s */
+		if (aid == NULL)
+			return (ENOMEM);
+		uid = *(uid_t *)aid;
+		free(aid);
+		aid = &uid;
+		break;
+	default:
+		return (EINVAL);	/* can't happen */
+	}
+
+	switch (entry_type) {
+
+	case ACL_ENTRY_TYPE_ALLOW:
+		ace->ace_type = L9P_ACET_ACCESS_ALLOWED;
+		break;
+
+	case ACL_ENTRY_TYPE_DENY:
+		ace->ace_type = L9P_ACET_ACCESS_DENIED;
+		break;
+
+	case ACL_ENTRY_TYPE_AUDIT:
+		ace->ace_type = L9P_ACET_SYSTEM_AUDIT;
+		break;
+
+	case ACL_ENTRY_TYPE_ALARM:
+		ace->ace_type = L9P_ACET_SYSTEM_ALARM;
+		break;
+
+	default:
+		return (EINVAL);	/* can't happen */
+	}
+
+	/* transform remaining BSD flags to internal NFS-y form */
+	bsdflag = *absdflag;
+	if (bsdflag & ACL_ENTRY_FILE_INHERIT)
+		flags |= L9P_ACEF_FILE_INHERIT_ACE;
+	if (bsdflag & ACL_ENTRY_DIRECTORY_INHERIT)
+		flags |= L9P_ACEF_DIRECTORY_INHERIT_ACE;
+	if (bsdflag & ACL_ENTRY_NO_PROPAGATE_INHERIT)
+		flags |= L9P_ACEF_NO_PROPAGATE_INHERIT_ACE;
+	if (bsdflag & ACL_ENTRY_INHERIT_ONLY)
+		flags |= L9P_ACEF_INHERIT_ONLY_ACE;
+	if (bsdflag & ACL_ENTRY_SUCCESSFUL_ACCESS)
+		flags |= L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG;
+	if (bsdflag & ACL_ENTRY_FAILED_ACCESS)
+		flags |= L9P_ACEF_FAILED_ACCESS_ACE_FLAG;
+	ace->ace_flags = flags;
+
+	/*
+	 * Transform BSD permissions to ace_mask.  Note that directory
+	 * vs file bits are the same in both sets, so we don't need
+	 * to worry about that, at least.
+	 *
+	 * There seem to be no BSD equivalents for WRITE_RETENTION
+	 * and WRITE_RETENTION_HOLD.
+	 */
+	mask = 0;
+	bsdperm = *absdperm;
+	if (bsdperm & ACL_READ_DATA)
+		mask |= L9P_ACE_READ_DATA;
+	if (bsdperm & ACL_WRITE_DATA)
+		mask |= L9P_ACE_WRITE_DATA;
+	if (bsdperm & ACL_APPEND_DATA)
+		mask |= L9P_ACE_APPEND_DATA;
+	if (bsdperm & ACL_READ_NAMED_ATTRS)
+		mask |= L9P_ACE_READ_NAMED_ATTRS;
+	if (bsdperm & ACL_WRITE_NAMED_ATTRS)
+		mask |= L9P_ACE_WRITE_NAMED_ATTRS;
+	if (bsdperm & ACL_EXECUTE)
+		mask |= L9P_ACE_EXECUTE;
+	if (bsdperm & ACL_DELETE_CHILD)
+		mask |= L9P_ACE_DELETE_CHILD;
+	if (bsdperm & ACL_READ_ATTRIBUTES)
+		mask |= L9P_ACE_READ_ATTRIBUTES;
+	if (bsdperm & ACL_WRITE_ATTRIBUTES)
+		mask |= L9P_ACE_WRITE_ATTRIBUTES;
+	/* L9P_ACE_WRITE_RETENTION */
+	/* L9P_ACE_WRITE_RETENTION_HOLD */
+	/* 0x00800 */
+	if (bsdperm & ACL_DELETE)
+		mask |= L9P_ACE_DELETE;
+	if (bsdperm & ACL_READ_ACL)
+		mask |= L9P_ACE_READ_ACL;
+	if (bsdperm & ACL_WRITE_ACL)
+		mask |= L9P_ACE_WRITE_ACL;
+	if (bsdperm & ACL_WRITE_OWNER)
+		mask |= L9P_ACE_WRITE_OWNER;
+	if (bsdperm & ACL_SYNCHRONIZE)
+		mask |= L9P_ACE_SYNCHRONIZE;
+	ace->ace_mask = mask;
+
+	/* fill in variable-size user or group ID bytes */
+	if (aid == NULL)
+		ace->ace_idsize = 0;
+	else {
+		ace->ace_idsize = sizeof(uid);
+		memcpy(&ace->ace_idbytes[0], aid, sizeof(uid));
+	}
+
+	return (0);
+}
+
+struct l9p_acl *
+l9p_freebsd_nfsv4acl_to_acl(acl_t sysacl)
+{
+
+	return (l9p_sysacl_to_acl(L9P_ACLTYPE_NFSv4, sysacl, l9p_frombsdnfs4));
+}
+#endif
+
+#if defined(HAVE_DARWIN_ACLS) && 0 /* not yet */
+struct l9p_acl *
+l9p_darwin_nfsv4acl_to_acl(acl_t sysacl)
+{
+}
+#endif
Index: lib/lib9p/hashtable.h
===================================================================
--- /dev/null
+++ lib/lib9p/hashtable.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_HASHTABLE_H
+#define LIB9P_HASHTABLE_H
+
+#include <pthread.h>
+#include <sys/queue.h>
+
+struct ht {
+	struct ht_entry * 	ht_entries;
+	ssize_t 		ht_nentries;
+	pthread_rwlock_t	ht_rwlock;
+};
+
+struct ht_entry {
+	TAILQ_HEAD(, ht_item) hte_items;
+};
+
+struct ht_item {
+	uint32_t		hti_hash;
+	void *			hti_data;
+	TAILQ_ENTRY(ht_item)	hti_link;
+};
+
+struct ht_iter {
+	struct ht *		htit_parent;
+	struct ht_item *	htit_curr;
+	struct ht_item *	htit_next;
+	ssize_t			htit_slot;
+};
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety-analysis"
+#endif
+
+/*
+ * Obtain read-lock on hash table.
+ */
+static inline int
+ht_rdlock(struct ht *h)
+{
+
+	return (pthread_rwlock_rdlock(&h->ht_rwlock));
+}
+
+/*
+ * Obtain write-lock on hash table.
+ */
+static inline int
+ht_wrlock(struct ht *h)
+{
+
+	return (pthread_rwlock_wrlock(&h->ht_rwlock));
+}
+
+/*
+ * Release lock on hash table.
+ */
+static inline int
+ht_unlock(struct ht *h)
+{
+
+	return (pthread_rwlock_unlock(&h->ht_rwlock));
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+void ht_init(struct ht *h, ssize_t size);
+void ht_destroy(struct ht *h);
+void *ht_find(struct ht *h, uint32_t hash);
+void *ht_find_locked(struct ht *h, uint32_t hash);
+int ht_add(struct ht *h, uint32_t hash, void *value);
+int ht_remove(struct ht *h, uint32_t hash);
+int ht_remove_locked(struct ht *h, uint32_t hash);
+int ht_remove_at_iter(struct ht_iter *iter);
+void ht_iter(struct ht *h, struct ht_iter *iter);
+void *ht_next(struct ht_iter *iter);
+
+#endif  /* LIB9P_HASHTABLE_H */
Index: lib/lib9p/hashtable.c
===================================================================
--- /dev/null
+++ lib/lib9p/hashtable.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include "lib9p_impl.h"
+#include "hashtable.h"
+
+static struct ht_item *ht_iter_advance(struct ht_iter *, struct ht_item *);
+
+void
+ht_init(struct ht *h, ssize_t size)
+{
+	ssize_t i;
+
+	memset(h, 0, sizeof(struct ht));
+	h->ht_nentries = size;
+	h->ht_entries = l9p_calloc((size_t)size, sizeof(struct ht_entry));
+	pthread_rwlock_init(&h->ht_rwlock, NULL);
+
+	for (i = 0; i < size; i++)
+		TAILQ_INIT(&h->ht_entries[i].hte_items);
+}
+
+void
+ht_destroy(struct ht *h)
+{
+	struct ht_entry *he;
+	struct ht_item *item, *tmp;
+	ssize_t i;
+
+	for (i = 0; i < h->ht_nentries; i++) {
+		he = &h->ht_entries[i];
+		TAILQ_FOREACH_SAFE(item, &he->hte_items, hti_link, tmp) {
+			free(item);
+		}
+	}
+
+	pthread_rwlock_destroy(&h->ht_rwlock);
+	free(h->ht_entries);
+	h->ht_entries = NULL;
+}
+
+void *
+ht_find(struct ht *h, uint32_t hash)
+{
+	void *result;
+
+	ht_rdlock(h);
+	result = ht_find_locked(h, hash);
+	ht_unlock(h);
+	return (result);
+}
+
+void *
+ht_find_locked(struct ht *h, uint32_t hash)
+{
+	struct ht_entry *entry;
+	struct ht_item *item;
+
+	entry = &h->ht_entries[hash % h->ht_nentries];
+
+	TAILQ_FOREACH(item, &entry->hte_items, hti_link) {
+		if (item->hti_hash == hash)
+			return (item->hti_data);
+	}
+
+	return (NULL);
+}
+
+int
+ht_add(struct ht *h, uint32_t hash, void *value)
+{
+	struct ht_entry *entry;
+	struct ht_item *item;
+
+	ht_wrlock(h);
+	entry = &h->ht_entries[hash % h->ht_nentries];
+
+	TAILQ_FOREACH(item, &entry->hte_items, hti_link) {
+		if (item->hti_hash == hash) {
+			errno = EEXIST;
+			ht_unlock(h);
+			return (-1);
+		}
+	}
+
+	item = l9p_calloc(1, sizeof(struct ht_item));
+	item->hti_hash = hash;
+	item->hti_data = value;
+	TAILQ_INSERT_TAIL(&entry->hte_items, item, hti_link);
+	ht_unlock(h);
+
+	return (0);
+}
+
+int
+ht_remove(struct ht *h, uint32_t hash)
+{
+	int result;
+
+	ht_wrlock(h);
+	result = ht_remove_locked(h, hash);
+	ht_unlock(h);
+	return (result);
+}
+
+int
+ht_remove_locked(struct ht *h, uint32_t hash)
+{
+	struct ht_entry *entry;
+	struct ht_item *item, *tmp;
+	ssize_t slot = hash % h->ht_nentries;
+
+	entry = &h->ht_entries[slot];
+
+	TAILQ_FOREACH_SAFE(item, &entry->hte_items, hti_link, tmp) {
+		if (item->hti_hash == hash) {
+			TAILQ_REMOVE(&entry->hte_items, item, hti_link);
+			free(item);
+			return (0);
+		}
+	}
+
+	errno = ENOENT;
+	return (-1);
+}
+
+/*
+ * Inner workings for advancing the iterator.
+ *
+ * If we have a current item, that tells us how to find the
+ * next item.  If not, we get the first item from the next
+ * slot (well, the next slot with an item); in any case, we
+ * record the new slot and return the next item.
+ *
+ * For bootstrapping, iter->htit_slot can be -1 to start
+ * searching at slot 0.
+ *
+ * Caller must hold a lock on the table.
+ */
+static struct ht_item *
+ht_iter_advance(struct ht_iter *iter, struct ht_item *cur)
+{
+	struct ht_item *next;
+	struct ht *h;
+	ssize_t slot;
+
+	h = iter->htit_parent;
+
+	if (cur == NULL)
+		next = NULL;
+	else
+		next = TAILQ_NEXT(cur, hti_link);
+
+	if (next == NULL) {
+		slot = iter->htit_slot;
+		while (++slot < h->ht_nentries) {
+			next = TAILQ_FIRST(&h->ht_entries[slot].hte_items);
+			if (next != NULL)
+				break;
+		}
+		iter->htit_slot = slot;
+	}
+	return (next);
+}
+
+/*
+ * Remove the current item - there must be one, or this is an
+ * error.  This (necessarily) pre-locates the next item, so callers
+ * must not use it on an actively-changing table.
+ */
+int
+ht_remove_at_iter(struct ht_iter *iter)
+{
+	struct ht_item *item;
+	struct ht *h;
+	ssize_t slot;
+
+	assert(iter != NULL);
+
+	if ((item = iter->htit_curr) == NULL) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	/* remove the item from the table, saving the NEXT one */
+	h = iter->htit_parent;
+	ht_wrlock(h);
+	slot = iter->htit_slot;
+	iter->htit_next = ht_iter_advance(iter, item);
+	TAILQ_REMOVE(&h->ht_entries[slot].hte_items, item, hti_link);
+	ht_unlock(h);
+
+	/* mark us as no longer on an item, then free it */
+	iter->htit_curr = NULL;
+	free(item);
+
+	return (0);
+}
+
+/*
+ * Initialize iterator.  Subsequent ht_next calls will find the
+ * first item, then the next, and so on.  Callers should in general
+ * not use this on actively-changing tables, though we do our best
+ * to make it semi-sensible.
+ */
+void
+ht_iter(struct ht *h, struct ht_iter *iter)
+{
+
+	iter->htit_parent = h;
+	iter->htit_curr = NULL;
+	iter->htit_next = NULL;
+	iter->htit_slot = -1;	/* which will increment to 0 */
+}
+
+/*
+ * Return the next item, which is the first item if we have not
+ * yet been called on this iterator, or the next item if we have.
+ */
+void *
+ht_next(struct ht_iter *iter)
+{
+	struct ht_item *item;
+	struct ht *h;
+
+	if ((item = iter->htit_next) == NULL) {
+		/* no pre-loaded next; find next from current */
+		h = iter->htit_parent;
+		ht_rdlock(h);
+		item = ht_iter_advance(iter, iter->htit_curr);
+		ht_unlock(h);
+	} else
+		iter->htit_next = NULL;
+	iter->htit_curr = item;
+	return (item == NULL ? NULL : item->hti_data);
+}
Index: lib/lib9p/lib9p.h
===================================================================
--- /dev/null
+++ lib/lib9p/lib9p.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+
+#ifndef LIB9P_LIB9P_H
+#define LIB9P_LIB9P_H
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/uio.h>
+#include <pthread.h>
+
+#if defined(__FreeBSD__)
+#include <sys/sbuf.h>
+#else
+#include "sbuf/sbuf.h"
+#endif
+
+#include "fcall.h"
+#include "threadpool.h"
+#include "hashtable.h"
+
+#define L9P_DEFAULT_MSIZE   8192
+#define L9P_MAX_IOV         128
+#define	L9P_NUMTHREADS      8
+
+struct l9p_request;
+struct l9p_backend;
+struct l9p_fid;
+
+/*
+ * Functions to implement underlying transport for lib9p.
+ *
+ * The transport is responsible for:
+ *
+ *   - allocating a response buffer (filling in the iovec and niov)
+ *     (gets req, pointer to base of iov array of size L9P_MAX_IOV,
+ *      pointer to niov, lt_aux)
+ *
+ *   - sending a response, when a request has a reply ready
+ *     (gets req, pointer to iov, niov, actual response length, lt_aux)
+ *
+ *   - dropping the response buffer, when a request has been
+ *     flushed or otherwise dropped without a response
+ *     (gets req, pointer to iov, niov, lt_aux)
+ *
+ * The transport is of course also responsible for feeding in
+ * request-buffers, but that happens by the transport calling
+ * l9p_connection_recv().
+ */
+struct l9p_transport {
+	void *lt_aux;
+	int (*lt_get_response_buffer)(struct l9p_request *, struct iovec *,
+	    size_t *, void *);
+	int (*lt_send_response)(struct l9p_request *, const struct iovec *,
+	    size_t, size_t, void *);
+	void (*lt_drop_response)(struct l9p_request *, const struct iovec *,
+	    size_t, void *);
+};
+
+enum l9p_pack_mode {
+	L9P_PACK,
+	L9P_UNPACK
+};
+
+enum l9p_integer_type {
+	L9P_BYTE = 1,
+	L9P_WORD = 2,
+	L9P_DWORD = 4,
+	L9P_QWORD = 8
+};
+
+enum l9p_version {
+	L9P_INVALID_VERSION = 0,
+	L9P_2000 = 1,
+	L9P_2000U = 2,
+	L9P_2000L = 3
+};
+
+/*
+ * This structure is used for unpacking (decoding) incoming
+ * requests and packing (encoding) outgoing results.  It has its
+ * own copy of the iov array, with its own counters for working
+ * through that array, but it borrows the actual DATA from the
+ * original iov array associated with the original request (see
+ * below).
+ */
+struct l9p_message {
+	enum l9p_pack_mode lm_mode;
+	struct iovec lm_iov[L9P_MAX_IOV];
+	size_t lm_niov;
+	size_t lm_cursor_iov;
+	size_t lm_cursor_offset;
+	size_t lm_size;
+};
+
+/*
+ * Data structure for a request/response pair (Tfoo/Rfoo).
+ *
+ * Note that the response is not formatted out into raw data
+ * (overwriting the request raw data) until we are really
+ * responding, with the exception of read operations Tread
+ * and Treaddir, which overlay their result-data into the
+ * iov array in the process of reading.
+ *
+ * We have room for two incoming fids, in case we are
+ * using 9P2000.L protocol.  Note that nothing that uses two
+ * fids also has an output fid (newfid), so we could have a
+ * union of lr_fid2 and lr_newfid, but keeping them separate
+ * is probably a bit less error-prone.  (If we want to shave
+ * memory requirements there are more places to look.)
+ *
+ * (The fid, fid2, and newfid fields should be removed via
+ * reorganization, as they are only used for smuggling data
+ * between request.c and the backend and should just be
+ * parameters to backend ops.)
+ */
+struct l9p_request {
+	struct l9p_message lr_req_msg;	/* for unpacking the request */
+	struct l9p_message lr_resp_msg;	/* for packing the response */
+	union l9p_fcall lr_req;		/* the request, decoded/unpacked */
+	union l9p_fcall lr_resp;	/* the response, not yet packed */
+
+	struct l9p_fid *lr_fid;
+	struct l9p_fid *lr_fid2;
+	struct l9p_fid *lr_newfid;
+
+	struct l9p_connection *lr_conn;	/* containing connection */
+	void *lr_aux;			/* reserved for transport layer */
+
+	struct iovec lr_data_iov[L9P_MAX_IOV];	/* iovecs for req + resp */
+	size_t lr_data_niov;			/* actual size of data_iov */
+
+	int lr_error;			/* result from l9p_dispatch_request */
+
+	/* proteced by threadpool mutex */
+	enum l9p_workstate lr_workstate;	/* threadpool: work state */
+	enum l9p_flushstate lr_flushstate;	/* flush state if flushee */
+	struct l9p_worker *lr_worker;		/* threadpool: worker */
+	STAILQ_ENTRY(l9p_request) lr_worklink;	/* reserved to threadpool */
+
+	/* protected by tag hash table lock */
+	struct l9p_request_queue lr_flushq;	/* q of flushers */
+	STAILQ_ENTRY(l9p_request) lr_flushlink;	/* link w/in flush queue */
+};
+
+/* N.B.: these dirents are variable length and for .L only */
+struct l9p_dirent {
+	struct l9p_qid qid;
+	uint64_t offset;
+	uint8_t type;
+	char *name;
+};
+
+/*
+ * The 9pfs protocol has the notion of a "session", which is
+ * traffic between any two "Tversion" requests.  All fids
+ * (lc_files, below) are specific to one particular session.
+ *
+ * We need a data structure per connection (client/server
+ * pair). This data structure lasts longer than these 9pfs
+ * sessions, but contains the request/response pairs and fids.
+ * Logically, the per-session data should be separate, but
+ * most of the time that would just require an extra
+ * indirection.  Instead, a new session simply clunks all
+ * fids, and otherwise keeps using this same connection.
+ */
+struct l9p_connection {
+	struct l9p_server *lc_server;
+	struct l9p_transport lc_lt;
+	struct l9p_threadpool lc_tp;
+	enum l9p_version lc_version;
+	uint32_t lc_msize;
+	uint32_t lc_max_io_size;
+	struct ht lc_files;
+	struct ht lc_requests;
+	LIST_ENTRY(l9p_connection) lc_link;
+};
+
+struct l9p_server {
+	struct l9p_backend *ls_backend;
+	enum l9p_version ls_max_version;
+	LIST_HEAD(, l9p_connection) ls_conns;
+};
+
+int l9p_pufcall(struct l9p_message *msg, union l9p_fcall *fcall,
+    enum l9p_version version);
+ssize_t l9p_pustat(struct l9p_message *msg, struct l9p_stat *s,
+    enum l9p_version version);
+uint16_t l9p_sizeof_stat(struct l9p_stat *stat, enum l9p_version version);
+int l9p_pack_stat(struct l9p_message *msg, struct l9p_request *req,
+    struct l9p_stat *s);
+ssize_t l9p_pudirent(struct l9p_message *msg, struct l9p_dirent *de);
+
+int l9p_server_init(struct l9p_server **serverp, struct l9p_backend *backend);
+
+int l9p_connection_init(struct l9p_server *server,
+    struct l9p_connection **connp);
+void l9p_connection_free(struct l9p_connection *conn);
+void l9p_connection_recv(struct l9p_connection *conn, const struct iovec *iov,
+    size_t niov, void *aux);
+void l9p_connection_close(struct l9p_connection *conn);
+struct l9p_fid *l9p_connection_alloc_fid(struct l9p_connection *conn,
+    uint32_t fid);
+void l9p_connection_remove_fid(struct l9p_connection *conn,
+    struct l9p_fid *fid);
+
+int l9p_dispatch_request(struct l9p_request *req);
+void l9p_respond(struct l9p_request *req, bool drop, bool rmtag);
+
+void l9p_init_msg(struct l9p_message *msg, struct l9p_request *req,
+    enum l9p_pack_mode mode);
+void l9p_seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2,
+    size_t *niov2, size_t seek);
+size_t l9p_truncate_iov(struct iovec *iov, size_t niov, size_t length);
+void l9p_describe_fcall(union l9p_fcall *fcall, enum l9p_version version,
+    struct sbuf *sb);
+void l9p_freefcall(union l9p_fcall *fcall);
+void l9p_freestat(struct l9p_stat *stat);
+
+gid_t *l9p_getgrlist(const char *, gid_t, int *);
+
+#endif  /* LIB9P_LIB9P_H */
Index: lib/lib9p/lib9p_impl.h
===================================================================
--- /dev/null
+++ lib/lib9p/lib9p_impl.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_LIB9P_IMPL_H
+#define LIB9P_LIB9P_IMPL_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef _KERNEL
+static inline void *
+l9p_malloc(size_t size)
+{
+	void *r = malloc(size);
+
+	if (r == NULL) {
+		fprintf(stderr, "cannot allocate %zd bytes: out of memory\n",
+		    size);
+		abort();
+	}
+
+	return (r);
+}
+
+static inline void *
+l9p_calloc(size_t n, size_t size)
+{
+	void *r = calloc(n, size);
+
+	if (r == NULL) {
+		fprintf(stderr, "cannot allocate %zd bytes: out of memory\n",
+		    n * size);
+		abort();
+	}
+
+	return (r);
+}
+
+static inline void *
+l9p_realloc(void *ptr, size_t newsize)
+{
+	void *r = realloc(ptr, newsize);
+
+	if (r == NULL) {
+		fprintf(stderr, "cannot allocate %zd bytes: out of memory\n",
+		    newsize);
+		abort();
+	}
+
+	return (r);
+}
+#endif /* _KERNEL */
+
+#endif /* LIB9P_LIB9P_IMPL_H */
Index: lib/lib9p/linux_errno.h
===================================================================
--- /dev/null
+++ lib/lib9p/linux_errno.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2016 Chris Torek <torek@ixsystems.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_LINUX_ERRNO_H
+#define LIB9P_LINUX_ERRNO_H
+
+/*
+ * Linux error numbers that are outside of the original base range
+ * (which ends with ERANGE).
+ *
+ * This is pretty much the same as Linux's errno.h except that the
+ * names are prefixed with "LINUX_", and we add _STR with the
+ * string name.
+ *
+ * The string expansions were obtained with a little program to
+ * print every strerror().
+ *
+ * Note that BSD EDEADLK is 11 and BSD EAGAIN is 35, vs
+ * Linux / Plan9 EAGAIN at 11.  So one value in the ERANGE
+ * range still needs translation too.
+ */
+
+#define	LINUX_EAGAIN		11
+#define	LINUX_EAGAIN_STR	"Resource temporarily unavailable"
+
+#define	LINUX_EDEADLK		35
+#define	LINUX_EDEADLK_STR	"Resource deadlock avoided"
+#define	LINUX_ENAMETOOLONG	36
+#define	LINUX_ENAMETOOLONG_STR	"File name too long"
+#define	LINUX_ENOLCK		37
+#define	LINUX_ENOLCK_STR	"No locks available"
+#define	LINUX_ENOSYS		38
+#define	LINUX_ENOSYS_STR	"Function not implemented"
+#define	LINUX_ENOTEMPTY		39
+#define	LINUX_ENOTEMPTY_STR	"Directory not empty"
+#define	LINUX_ELOOP		40
+#define	LINUX_ELOOP_STR		"Too many levels of symbolic links"
+/*				41 unused */
+#define	LINUX_ENOMSG		42
+#define	LINUX_ENOMSG_STR	"No message of desired type"
+#define	LINUX_EIDRM		43
+#define	LINUX_EIDRM_STR		"Identifier removed"
+#define	LINUX_ECHRNG		44
+#define	LINUX_ECHRNG_STR	"Channel number out of range"
+#define	LINUX_EL2NSYNC		45
+#define	LINUX_EL2NSYNC_STR	"Level 2 not synchronized"
+#define	LINUX_EL3HLT		46
+#define	LINUX_EL3HLT_STR	"Level 3 halted"
+#define	LINUX_EL3RST		47
+#define	LINUX_EL3RST_STR	"Level 3 reset"
+#define	LINUX_ELNRNG		48
+#define	LINUX_ELNRNG_STR	"Link number out of range"
+#define	LINUX_EUNATCH		49
+#define	LINUX_EUNATCH_STR	"Protocol driver not attached"
+#define	LINUX_ENOCSI		50
+#define	LINUX_ENOCSI_STR	"No CSI structure available"
+#define	LINUX_EL2HLT		51
+#define	LINUX_EL2HLT_STR	"Level 2 halted"
+#define	LINUX_EBADE		52
+#define	LINUX_EBADE_STR		"Invalid exchange"
+#define	LINUX_EBADR		53
+#define	LINUX_EBADR_STR		"Invalid request descriptor"
+#define	LINUX_EXFULL		54
+#define	LINUX_EXFULL_STR	"Exchange full"
+#define	LINUX_ENOANO		55
+#define	LINUX_ENOANO_STR	"No anode"
+#define	LINUX_EBADRQC		56
+#define	LINUX_EBADRQC_STR	"Invalid request code"
+#define	LINUX_EBADSLT		57
+#define	LINUX_EBADSLT_STR	"Invalid slot"
+/*				58 unused */
+#define	LINUX_EBFONT		59
+#define	LINUX_EBFONT_STR	"Bad font file format"
+#define	LINUX_ENOSTR		60
+#define	LINUX_ENOSTR_STR	"Device not a stream"
+#define	LINUX_ENODATA		61
+#define	LINUX_ENODATA_STR	"No data available"
+#define	LINUX_ETIME		62
+#define	LINUX_ETIME_STR		"Timer expired"
+#define	LINUX_ENOSR		63
+#define	LINUX_ENOSR_STR		"Out of streams resources"
+#define	LINUX_ENONET		64
+#define	LINUX_ENONET_STR	"Machine is not on the network"
+#define	LINUX_ENOPKG		65
+#define	LINUX_ENOPKG_STR	"Package not installed"
+#define	LINUX_EREMOTE		66
+#define	LINUX_EREMOTE_STR	"Object is remote"
+#define	LINUX_ENOLINK		67
+#define	LINUX_ENOLINK_STR	"Link has been severed"
+#define	LINUX_EADV		68
+#define	LINUX_EADV_STR		"Advertise error"
+#define	LINUX_ESRMNT		69
+#define	LINUX_ESRMNT_STR	"Srmount error"
+#define	LINUX_ECOMM		70
+#define	LINUX_ECOMM_STR		"Communication error on send"
+#define	LINUX_EPROTO		71
+#define	LINUX_EPROTO_STR	"Protocol error"
+#define	LINUX_EMULTIHOP		72
+#define	LINUX_EMULTIHOP_STR	"Multihop attempted"
+#define	LINUX_EDOTDOT		73
+#define	LINUX_EDOTDOT_STR	"RFS specific error"
+#define	LINUX_EBADMSG		74
+#define	LINUX_EBADMSG_STR	"Bad message"
+#define	LINUX_EOVERFLOW		75
+#define	LINUX_EOVERFLOW_STR	"Value too large for defined data type"
+#define	LINUX_ENOTUNIQ		76
+#define	LINUX_ENOTUNIQ_STR	"Name not unique on network"
+#define	LINUX_EBADFD		77
+#define	LINUX_EBADFD_STR	"File descriptor in bad state"
+#define	LINUX_EREMCHG		78
+#define	LINUX_EREMCHG_STR	"Remote address changed"
+#define	LINUX_ELIBACC		79
+#define	LINUX_ELIBACC_STR	"Can not access a needed shared library"
+#define	LINUX_ELIBBAD		80
+#define	LINUX_ELIBBAD_STR	"Accessing a corrupted shared library"
+#define	LINUX_ELIBSCN		81
+#define	LINUX_ELIBSCN_STR	".lib section in a.out corrupted"
+#define	LINUX_ELIBMAX		82
+#define	LINUX_ELIBMAX_STR	"Attempting to link in too many shared libraries"
+#define	LINUX_ELIBEXEC		83
+#define	LINUX_ELIBEXEC_STR	"Cannot exec a shared library directly"
+#define	LINUX_EILSEQ		84
+#define	LINUX_EILSEQ_STR	"Invalid or incomplete multibyte or wide character"
+#define	LINUX_ERESTART		85
+#define	LINUX_ERESTART_STR	"Interrupted system call should be restarted"
+#define	LINUX_ESTRPIPE		86
+#define	LINUX_ESTRPIPE_STR	"Streams pipe error"
+#define	LINUX_EUSERS		87
+#define	LINUX_EUSERS_STR	"Too many users"
+#define	LINUX_ENOTSOCK		88
+#define	LINUX_ENOTSOCK_STR	"Socket operation on non-socket"
+#define	LINUX_EDESTADDRREQ	89
+#define	LINUX_EDESTADDRREQ_STR	"Destination address required"
+#define	LINUX_EMSGSIZE		90
+#define	LINUX_EMSGSIZE_STR	"Message too long"
+#define	LINUX_EPROTOTYPE	91
+#define	LINUX_EPROTOTYPE_STR	"Protocol wrong type for socket"
+#define	LINUX_ENOPROTOOPT	92
+#define	LINUX_ENOPROTOOPT_STR	"Protocol not available"
+#define	LINUX_EPROTONOSUPPORT	93
+#define	LINUX_EPROTONOSUPPORT_STR "Protocol not supported"
+#define	LINUX_ESOCKTNOSUPPORT	94
+#define	LINUX_ESOCKTNOSUPPORT_STR "Socket type not supported"
+#define	LINUX_EOPNOTSUPP	95
+#define	LINUX_EOPNOTSUPP_STR	"Operation not supported"
+#define	LINUX_EPFNOSUPPORT	96
+#define	LINUX_EPFNOSUPPORT_STR	"Protocol family not supported"
+#define	LINUX_EAFNOSUPPORT	97
+#define	LINUX_EAFNOSUPPORT_STR	"Address family not supported by protocol"
+#define	LINUX_EADDRINUSE	98
+#define	LINUX_EADDRINUSE_STR	"Address already in use"
+#define	LINUX_EADDRNOTAVAIL	99
+#define	LINUX_EADDRNOTAVAIL_STR	"Cannot assign requested address"
+#define	LINUX_ENETDOWN		100
+#define	LINUX_ENETDOWN_STR	"Network is down"
+#define	LINUX_ENETUNREACH	101
+#define	LINUX_ENETUNREACH_STR	"Network is unreachable"
+#define	LINUX_ENETRESET		102
+#define	LINUX_ENETRESET_STR	"Network dropped connection on reset"
+#define	LINUX_ECONNABORTED	103
+#define	LINUX_ECONNABORTED_STR	"Software caused connection abort"
+#define	LINUX_ECONNRESET	104
+#define	LINUX_ECONNRESET_STR	"Connection reset by peer"
+#define	LINUX_ENOBUFS		105
+#define	LINUX_ENOBUFS_STR	"No buffer space available"
+#define	LINUX_EISCONN		106
+#define	LINUX_EISCONN_STR	"Transport endpoint is already connected"
+#define	LINUX_ENOTCONN		107
+#define	LINUX_ENOTCONN_STR	"Transport endpoint is not connected"
+#define	LINUX_ESHUTDOWN		108
+#define	LINUX_ESHUTDOWN_STR	"Cannot send after transport endpoint shutdown"
+#define	LINUX_ETOOMANYREFS	109
+#define	LINUX_ETOOMANYREFS_STR	"Too many references: cannot splice"
+#define	LINUX_ETIMEDOUT		110
+#define	LINUX_ETIMEDOUT_STR	"Connection timed out"
+#define	LINUX_ECONNREFUSED	111
+#define	LINUX_ECONNREFUSED_STR	"Connection refused"
+#define	LINUX_EHOSTDOWN		112
+#define	LINUX_EHOSTDOWN_STR	"Host is down"
+#define	LINUX_EHOSTUNREACH	113
+#define	LINUX_EHOSTUNREACH_STR	"No route to host"
+#define	LINUX_EALREADY		114
+#define	LINUX_EALREADY_STR	"Operation already in progress"
+#define	LINUX_EINPROGRESS	115
+#define	LINUX_EINPROGRESS_STR	"Operation now in progress"
+#define	LINUX_ESTALE		116
+#define	LINUX_ESTALE_STR	"Stale file handle"
+#define	LINUX_EUCLEAN		117
+#define	LINUX_EUCLEAN_STR	"Structure needs cleaning"
+#define	LINUX_ENOTNAM		118
+#define	LINUX_ENOTNAM_STR	"Not a XENIX named type file"
+#define	LINUX_ENAVAIL		119
+#define	LINUX_ENAVAIL_STR	"No XENIX semaphores available"
+#define	LINUX_EISNAM		120
+#define	LINUX_EISNAM_STR	"Is a named type file"
+#define	LINUX_EREMOTEIO		121
+#define	LINUX_EREMOTEIO_STR	"Remote I/O error"
+#define	LINUX_EDQUOT		122
+#define	LINUX_EDQUOT_STR	"Quota exceeded"
+#define	LINUX_ENOMEDIUM		123
+#define	LINUX_ENOMEDIUM_STR	"No medium found"
+#define	LINUX_EMEDIUMTYPE	124
+#define	LINUX_EMEDIUMTYPE_STR	"Wrong medium type"
+#define	LINUX_ECANCELED		125
+#define	LINUX_ECANCELED_STR	"Operation canceled"
+#define	LINUX_ENOKEY		126
+#define	LINUX_ENOKEY_STR	"Required key not available"
+#define	LINUX_EKEYEXPIRED	127
+#define	LINUX_EKEYEXPIRED_STR	"Key has expired"
+#define	LINUX_EKEYREVOKED	128
+#define	LINUX_EKEYREVOKED_STR	"Key has been revoked"
+#define	LINUX_EKEYREJECTED	129
+#define	LINUX_EKEYREJECTED_STR	"Key was rejected by service"
+#define	LINUX_EOWNERDEAD	130
+#define	LINUX_EOWNERDEAD_STR	"Owner died"
+#define	LINUX_ENOTRECOVERABLE	131
+#define	LINUX_ENOTRECOVERABLE_STR "State not recoverable"
+#define	LINUX_ERFKILL		132
+#define	LINUX_ERFKILL_STR	"Operation not possible due to RF-kill"
+#define	LINUX_EHWPOISON		133
+#define	LINUX_EHWPOISON_STR	"Memory page has hardware error"
+
+#endif	/* LIB9P_LINUX_ERRNO_H */
Index: lib/lib9p/log.h
===================================================================
--- /dev/null
+++ lib/lib9p/log.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_LOG_H
+#define	LIB9P_LOG_H
+
+enum l9p_log_level {
+	L9P_DEBUG,
+	L9P_INFO,
+	L9P_WARNING,
+	L9P_ERROR
+};
+
+void l9p_logf(enum l9p_log_level level, const char *func, const char *fmt, ...);
+
+#if defined(L9P_DEBUG)
+#define	L9P_LOG(level, fmt, ...) l9p_logf(level, __func__, fmt, ##__VA_ARGS__)
+#else
+#define L9P_LOG(level, fmt, ...)
+#endif
+
+#endif	/* LIB9P_LOG_H */
Index: lib/lib9p/log.c
===================================================================
--- /dev/null
+++ lib/lib9p/log.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include "log.h"
+
+static const char *l9p_log_level_names[] = {
+	"DEBUG",
+	"INFO",
+	"WARN",
+	"ERROR"
+};
+
+void
+l9p_logf(enum l9p_log_level level, const char *func, const char *fmt, ...)
+{
+	const char *dest = NULL;
+	static FILE *stream = NULL;
+	va_list ap;
+
+	if (stream == NULL) {
+		dest = getenv("LIB9P_LOGGING");
+		if (dest == NULL)
+			return;
+		else if (!strcmp(dest, "stderr"))
+			stream = stderr;
+		else {
+			stream = fopen(dest, "a");
+			if (stream == NULL)
+				return;
+		}
+	}
+
+	va_start(ap, fmt);
+	fprintf(stream, "[%s]\t %s: ", l9p_log_level_names[level], func);
+	vfprintf(stream, fmt, ap);
+	fprintf(stream, "\n");
+	fflush(stream);
+	va_end(ap);
+}
Index: lib/lib9p/pack.c
===================================================================
--- /dev/null
+++ lib/lib9p/pack.c
@@ -0,0 +1,993 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Based on libixp code: ©2007-2010 Kris Maglione <maglione.k at Gmail>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#ifdef __APPLE__
+# include "apple_endian.h"
+#else
+# include <sys/endian.h>
+#endif
+#include <sys/uio.h>
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "log.h"
+
+#define N(ary)          (sizeof(ary) / sizeof(*ary))
+#define STRING_SIZE(s)  (L9P_WORD + (s != NULL ? (uint16_t)strlen(s) : 0))
+#define QID_SIZE        (L9P_BYTE + L9P_DWORD + L9P_QWORD)
+
+static ssize_t l9p_iov_io(struct l9p_message *, void *, size_t);
+static inline ssize_t l9p_pu8(struct l9p_message *, uint8_t *);
+static inline ssize_t l9p_pu16(struct l9p_message *, uint16_t *);
+static inline ssize_t l9p_pu32(struct l9p_message *, uint32_t *);
+static inline ssize_t l9p_pu64(struct l9p_message *, uint64_t *);
+static ssize_t l9p_pustring(struct l9p_message *, char **s);
+static ssize_t l9p_pustrings(struct l9p_message *, uint16_t *, char **, size_t);
+static ssize_t l9p_puqid(struct l9p_message *, struct l9p_qid *);
+static ssize_t l9p_puqids(struct l9p_message *, uint16_t *, struct l9p_qid *q);
+
+/*
+ * Transfer data from incoming request, or to outgoing response,
+ * using msg to track position and direction within request/response.
+ *
+ * Returns the number of bytes actually transferred (which is always
+ * just len itself, converted to signed), or -1 if we ran out of space.
+ *
+ * Note that if we return -1, subsequent l9p_iov_io() calls with
+ * the same (and not-reset) msg and len > 0 will also return -1.
+ * This means most users can just check the *last* call for failure.
+ */
+static ssize_t
+l9p_iov_io(struct l9p_message *msg, void *buffer, size_t len)
+{
+	size_t done = 0;
+	size_t left = len;
+
+	assert(msg != NULL);
+
+	if (len == 0)
+		return (0);
+
+	if (msg->lm_cursor_iov >= msg->lm_niov)
+		return (-1);
+
+	assert(buffer != NULL);
+
+	while (left > 0) {
+		size_t idx = msg->lm_cursor_iov;
+		size_t space = msg->lm_iov[idx].iov_len - msg->lm_cursor_offset;
+		size_t towrite = MIN(space, left);
+
+		if (msg->lm_mode == L9P_PACK) {
+			memcpy((char *)msg->lm_iov[idx].iov_base +
+			    msg->lm_cursor_offset, (char *)buffer + done,
+			    towrite);
+		}
+
+		if (msg->lm_mode == L9P_UNPACK) {
+			memcpy((char *)buffer + done,
+			    (char *)msg->lm_iov[idx].iov_base +
+			    msg->lm_cursor_offset, towrite);
+		}
+
+		msg->lm_cursor_offset += towrite;
+
+		done += towrite;
+		left -= towrite;
+
+		if (space - towrite == 0) {
+			/* Advance to next iov */
+			msg->lm_cursor_iov++;
+			msg->lm_cursor_offset = 0;
+
+			if (msg->lm_cursor_iov >= msg->lm_niov && left > 0)
+				return (-1);
+		}
+	}
+
+	msg->lm_size += done;
+	return ((ssize_t)done);
+}
+
+/*
+ * Pack or unpack a byte (8 bits).
+ *
+ * Returns 1 (success, 1 byte) or -1 (error).
+ */
+static inline ssize_t
+l9p_pu8(struct l9p_message *msg, uint8_t *val)
+{
+
+	return (l9p_iov_io(msg, val, sizeof (uint8_t)));
+}
+
+/*
+ * Pack or unpack 16-bit value.
+ *
+ * Returns 2 or -1.
+ */
+static inline ssize_t
+l9p_pu16(struct l9p_message *msg, uint16_t *val)
+{
+#if _BYTE_ORDER != _LITTLE_ENDIAN
+	/*
+	 * The ifdefs are annoying, but there is no need
+	 * for all of this foolery on little-endian hosts,
+	 * and I don't expect the compiler to optimize it
+	 * all away.
+	 */
+	uint16_t copy;
+	ssize_t ret;
+
+	if (msg->lm_mode == L9P_PACK) {
+		copy = htole16(*val);
+		return (l9p_iov_io(msg, &copy, sizeof (uint16_t)));
+	}
+	ret = l9p_iov_io(msg, val, sizeof (uint16_t));
+	*val = le16toh(*val);
+	return (ret);
+#else
+	return (l9p_iov_io(msg, val, sizeof (uint16_t)));
+#endif
+}
+
+/*
+ * Pack or unpack 32-bit value.
+ *
+ * Returns 4 or -1.
+ */
+static inline ssize_t
+l9p_pu32(struct l9p_message *msg, uint32_t *val)
+{
+#if _BYTE_ORDER != _LITTLE_ENDIAN
+	uint32_t copy;
+	ssize_t ret;
+
+	if (msg->lm_mode == L9P_PACK) {
+		copy = htole32(*val);
+		return (l9p_iov_io(msg, &copy, sizeof (uint32_t)));
+	}
+	ret = l9p_iov_io(msg, val, sizeof (uint32_t));
+	*val = le32toh(*val);
+	return (ret);
+#else
+	return (l9p_iov_io(msg, val, sizeof (uint32_t)));
+#endif
+}
+
+/*
+ * Pack or unpack 64-bit value.
+ *
+ * Returns 8 or -1.
+ */
+static inline ssize_t
+l9p_pu64(struct l9p_message *msg, uint64_t *val)
+{
+#if _BYTE_ORDER != _LITTLE_ENDIAN
+	uint64_t copy;
+	ssize_t ret;
+
+	if (msg->lm_mode == L9P_PACK) {
+		copy = htole64(*val);
+		return (l9p_iov_io(msg, &copy, sizeof (uint64_t)));
+	}
+	ret = l9p_iov_io(msg, val, sizeof (uint32_t));
+	*val = le64toh(*val);
+	return (ret);
+#else
+	return (l9p_iov_io(msg, val, sizeof (uint64_t)));
+#endif
+}
+
+/*
+ * Pack or unpack a string, encoded as 2-byte length followed by
+ * string bytes.  The returned length is 2 greater than the
+ * length of the string itself.
+ *
+ * When unpacking, this allocates a new string (NUL-terminated).
+ *
+ * Return -1 on error (not space, or failed to allocate string,
+ * or illegal string).
+ *
+ * Note that pustring (and hence pustrings) can return an error
+ * even when l9p_iov_io succeeds.
+ */
+static ssize_t
+l9p_pustring(struct l9p_message *msg, char **s)
+{
+	uint16_t len;
+
+	if (msg->lm_mode == L9P_PACK)
+		len = *s != NULL ? (uint16_t)strlen(*s) : 0;
+
+	if (l9p_pu16(msg, &len) < 0)
+		return (-1);
+
+	if (msg->lm_mode == L9P_UNPACK) {
+		*s = l9p_calloc(1, len + 1);
+		if (*s == NULL)
+			return (-1);
+	}
+
+	if (l9p_iov_io(msg, *s, len) < 0)
+		return (-1);
+
+	if (msg->lm_mode == L9P_UNPACK) {
+		/*
+		 * An embedded NUL byte in a string is illegal.
+		 * We don't necessarily have to check (we'll just
+		 * treat it as a shorter string), but checking
+		 * seems like a good idea.
+		 */
+		if (memchr(*s, '\0', len) != NULL)
+			return (-1);
+	}
+
+	return ((ssize_t)len + 2);
+}
+
+/*
+ * Pack or unpack a number (*num) of strings (but at most max of
+ * them).
+ *
+ * Returns the number of bytes transferred, including the packed
+ * number of strings.  If packing and the packed number of strings
+ * was reduced, the original *num value is unchanged; only the
+ * wire-format number is reduced.  If unpacking and the input
+ * number of strings exceeds the max, the incoming *num is reduced
+ * to lim, if needed.  (NOTE ASYMMETRY HERE!)
+ *
+ * Returns -1 on error.
+ */
+static ssize_t
+l9p_pustrings(struct l9p_message *msg, uint16_t *num, char **strings,
+    size_t max)
+{
+	size_t i, lim;
+	ssize_t r, ret;
+	uint16_t adjusted;
+
+	if (msg->lm_mode == L9P_PACK) {
+		lim = *num;
+		if (lim > max)
+			lim = max;
+		adjusted = (uint16_t)lim;
+		r = l9p_pu16(msg, &adjusted);
+	} else {
+		r = l9p_pu16(msg, num);
+		lim = *num;
+		if (lim > max)
+			*num = (uint16_t)(lim = max);
+	}
+	if (r < 0)
+		return (-1);
+
+	for (i = 0; i < lim; i++) {
+		ret = l9p_pustring(msg, &strings[i]);
+		if (ret < 1)
+			return (-1);
+
+		r += ret;
+	}
+
+	return (r);
+}
+
+/*
+ * Pack or unpack a qid.
+ *
+ * Returns 13 (success) or -1 (error).
+ */
+static ssize_t
+l9p_puqid(struct l9p_message *msg, struct l9p_qid *qid)
+{
+	ssize_t r;
+	uint8_t type;
+
+	if (msg->lm_mode == L9P_PACK) {
+		type = qid->type;
+		r = l9p_pu8(msg, &type);
+	} else {
+		r = l9p_pu8(msg, &type);
+		qid->type = type;
+	}
+	if (r > 0)
+		r = l9p_pu32(msg, &qid->version);
+	if (r > 0)
+		r = l9p_pu64(msg, &qid->path);
+
+	return (r > 0 ? QID_SIZE : r);
+}
+
+/*
+ * Pack or unpack *num qids.
+ *
+ * Returns 2 + 13 * *num (after possibly setting *num), or -1 on error.
+ */
+static ssize_t
+l9p_puqids(struct l9p_message *msg, uint16_t *num, struct l9p_qid *qids)
+{
+	size_t i, lim;
+	ssize_t ret, r;
+
+	r = l9p_pu16(msg, num);
+	if (r > 0) {
+		for (i = 0, lim = *num; i < lim; i++) {
+			ret = l9p_puqid(msg, &qids[i]);
+			if (ret < 0)
+				return (-1);
+			r += ret;
+		}
+	}
+	return (r);
+}
+
+/*
+ * Pack or unpack a l9p_stat.
+ *
+ * These have variable size, and the size further depends on
+ * the protocol version.
+ *
+ * Returns the number of bytes packed/unpacked, or -1 on error.
+ */
+ssize_t
+l9p_pustat(struct l9p_message *msg, struct l9p_stat *stat,
+    enum l9p_version version)
+{
+	ssize_t r = 0;
+	uint16_t size;
+
+	/* The on-wire size field excludes the size of the size field. */
+	if (msg->lm_mode == L9P_PACK)
+		size = l9p_sizeof_stat(stat, version) - 2;
+
+	r += l9p_pu16(msg, &size);
+	r += l9p_pu16(msg, &stat->type);
+	r += l9p_pu32(msg, &stat->dev);
+	r += l9p_puqid(msg, &stat->qid);
+	r += l9p_pu32(msg, &stat->mode);
+	r += l9p_pu32(msg, &stat->atime);
+	r += l9p_pu32(msg, &stat->mtime);
+	r += l9p_pu64(msg, &stat->length);
+	r += l9p_pustring(msg, &stat->name);
+	r += l9p_pustring(msg, &stat->uid);
+	r += l9p_pustring(msg, &stat->gid);
+	r += l9p_pustring(msg, &stat->muid);
+
+	if (version >= L9P_2000U) {
+		r += l9p_pustring(msg, &stat->extension);
+		r += l9p_pu32(msg, &stat->n_uid);
+		r += l9p_pu32(msg, &stat->n_gid);
+		r += l9p_pu32(msg, &stat->n_muid);
+	}
+
+	if (r < size + 2)
+		return (-1);
+
+	return (r);
+}
+
+/*
+ * Pack or unpack a variable-length dirent.
+ *
+ * If unpacking, the name field is malloc()ed and the caller must
+ * free it.
+ *
+ * Returns the wire-format length, or -1 if we ran out of room.
+ */
+ssize_t
+l9p_pudirent(struct l9p_message *msg, struct l9p_dirent *de)
+{
+	ssize_t r, s;
+
+	r = l9p_puqid(msg, &de->qid);
+	r += l9p_pu64(msg, &de->offset);
+	r += l9p_pu8(msg, &de->type);
+	s = l9p_pustring(msg, &de->name);
+	if (r < QID_SIZE + 8 + 1 || s < 0)
+		return (-1);
+	return (r + s);
+}
+
+/*
+ * Pack or unpack a request or response (fcall).
+ *
+ * Returns 0 on success, -1 on error.  (It's up to the caller
+ * to call l9p_freefcall on our failure.)
+ */
+int
+l9p_pufcall(struct l9p_message *msg, union l9p_fcall *fcall,
+    enum l9p_version version)
+{
+	uint32_t length = 0;
+	ssize_t r;
+
+	/*
+	 * Get overall length, type, and tag, which should appear
+	 * in all messages.  If not even that works, abort immediately.
+	 */
+	l9p_pu32(msg, &length);
+	l9p_pu8(msg, &fcall->hdr.type);
+	r = l9p_pu16(msg, &fcall->hdr.tag);
+	if (r < 0)
+		return (-1);
+
+	/*
+	 * Decode remainder of message.	 When unpacking, this may
+	 * allocate memory, even if we fail during the decode.
+	 * Note that the initial fcall is zeroed out, though, so
+	 * we can just freefcall() to release whatever might have
+	 * gotten allocated, if the unpack fails due to a short
+	 * packet.
+	 */
+	switch (fcall->hdr.type) {
+	case L9P_TVERSION:
+	case L9P_RVERSION:
+		l9p_pu32(msg, &fcall->version.msize);
+		r = l9p_pustring(msg, &fcall->version.version);
+		break;
+
+	case L9P_TAUTH:
+		l9p_pu32(msg, &fcall->tauth.afid);
+		r = l9p_pustring(msg, &fcall->tauth.uname);
+		if (r < 0)
+			break;
+		r = l9p_pustring(msg, &fcall->tauth.aname);
+		if (r < 0)
+			break;
+		if (version >= L9P_2000U)
+			r = l9p_pu32(msg, &fcall->tauth.n_uname);
+		break;
+
+	case L9P_RAUTH:
+		r = l9p_puqid(msg, &fcall->rauth.aqid);
+		break;
+
+	case L9P_TATTACH:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->tattach.afid);
+		r = l9p_pustring(msg, &fcall->tattach.uname);
+		if (r < 0)
+			break;
+		r = l9p_pustring(msg, &fcall->tattach.aname);
+		if (r < 0)
+			break;
+		if (version >= L9P_2000U)
+			r = l9p_pu32(msg, &fcall->tattach.n_uname);
+		break;
+
+	case L9P_RATTACH:
+		r = l9p_puqid(msg, &fcall->rattach.qid);
+		break;
+
+	case L9P_RERROR:
+		r = l9p_pustring(msg, &fcall->error.ename);
+		if (r < 0)
+			break;
+		if (version >= L9P_2000U)
+			r = l9p_pu32(msg, &fcall->error.errnum);
+		break;
+
+	case L9P_RLERROR:
+		r = l9p_pu32(msg, &fcall->error.errnum);
+		break;
+
+	case L9P_TFLUSH:
+		r = l9p_pu16(msg, &fcall->tflush.oldtag);
+		break;
+
+	case L9P_RFLUSH:
+		break;
+
+	case L9P_TWALK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->twalk.newfid);
+		r = l9p_pustrings(msg, &fcall->twalk.nwname,
+		    fcall->twalk.wname, N(fcall->twalk.wname));
+		break;
+
+	case L9P_RWALK:
+		r = l9p_puqids(msg, &fcall->rwalk.nwqid, fcall->rwalk.wqid);
+		break;
+
+	case L9P_TOPEN:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pu8(msg, &fcall->topen.mode);
+		break;
+
+	case L9P_ROPEN:
+		l9p_puqid(msg, &fcall->ropen.qid);
+		r = l9p_pu32(msg, &fcall->ropen.iounit);
+		break;
+
+	case L9P_TCREATE:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tcreate.name);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->tcreate.perm);
+		r = l9p_pu8(msg, &fcall->tcreate.mode);
+		if (version >= L9P_2000U)
+			r = l9p_pustring(msg, &fcall->tcreate.extension);
+		break;
+
+	case L9P_RCREATE:
+		l9p_puqid(msg, &fcall->rcreate.qid);
+		r = l9p_pu32(msg, &fcall->rcreate.iounit);
+		break;
+
+	case L9P_TREAD:
+	case L9P_TREADDIR:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu64(msg, &fcall->io.offset);
+		r = l9p_pu32(msg, &fcall->io.count);
+		break;
+
+	case L9P_RREAD:
+	case L9P_RREADDIR:
+		r = l9p_pu32(msg, &fcall->io.count);
+		break;
+
+	case L9P_TWRITE:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu64(msg, &fcall->io.offset);
+		r = l9p_pu32(msg, &fcall->io.count);
+		break;
+
+	case L9P_RWRITE:
+		r = l9p_pu32(msg, &fcall->io.count);
+		break;
+
+	case L9P_TCLUNK:
+	case L9P_TSTAT:
+	case L9P_TREMOVE:
+	case L9P_TSTATFS:
+		r = l9p_pu32(msg, &fcall->hdr.fid);
+		break;
+
+	case L9P_RCLUNK:
+	case L9P_RREMOVE:
+		break;
+
+	case L9P_RSTAT:
+	{
+		uint16_t size = l9p_sizeof_stat(&fcall->rstat.stat,
+		    version);
+		l9p_pu16(msg, &size);
+		r = l9p_pustat(msg, &fcall->rstat.stat, version);
+	}
+		break;
+
+	case L9P_TWSTAT:
+	{
+		uint16_t size;
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu16(msg, &size);
+		r = l9p_pustat(msg, &fcall->twstat.stat, version);
+	}
+		break;
+
+	case L9P_RWSTAT:
+		break;
+
+	case L9P_RSTATFS:
+		l9p_pu32(msg, &fcall->rstatfs.statfs.type);
+		l9p_pu32(msg, &fcall->rstatfs.statfs.bsize);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.blocks);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.bfree);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.bavail);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.files);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.ffree);
+		l9p_pu64(msg, &fcall->rstatfs.statfs.fsid);
+		r = l9p_pu32(msg, &fcall->rstatfs.statfs.namelen);
+		break;
+
+	case L9P_TLOPEN:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pu32(msg, &fcall->tlopen.flags);
+		break;
+
+	case L9P_RLOPEN:
+		l9p_puqid(msg, &fcall->rlopen.qid);
+		r = l9p_pu32(msg, &fcall->rlopen.iounit);
+		break;
+
+	case L9P_TLCREATE:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tlcreate.name);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->tlcreate.flags);
+		l9p_pu32(msg, &fcall->tlcreate.mode);
+		r = l9p_pu32(msg, &fcall->tlcreate.gid);
+		break;
+
+	case L9P_RLCREATE:
+		l9p_puqid(msg, &fcall->rlcreate.qid);
+		r = l9p_pu32(msg, &fcall->rlcreate.iounit);
+		break;
+
+	case L9P_TSYMLINK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tsymlink.name);
+		if (r < 0)
+			break;
+		r = l9p_pustring(msg, &fcall->tsymlink.symtgt);
+		if (r < 0)
+			break;
+		r = l9p_pu32(msg, &fcall->tlcreate.gid);
+		break;
+
+	case L9P_RSYMLINK:
+		r = l9p_puqid(msg, &fcall->rsymlink.qid);
+		break;
+
+	case L9P_TMKNOD:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tmknod.name);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->tmknod.mode);
+		l9p_pu32(msg, &fcall->tmknod.major);
+		l9p_pu32(msg, &fcall->tmknod.minor);
+		r = l9p_pu32(msg, &fcall->tmknod.gid);
+		break;
+
+	case L9P_RMKNOD:
+		r = l9p_puqid(msg, &fcall->rmknod.qid);
+		break;
+
+	case L9P_TRENAME:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->trename.dfid);
+		r = l9p_pustring(msg, &fcall->trename.name);
+		break;
+
+	case L9P_RRENAME:
+		break;
+
+	case L9P_TREADLINK:
+		r = l9p_pu32(msg, &fcall->hdr.fid);
+		break;
+
+	case L9P_RREADLINK:
+		r = l9p_pustring(msg, &fcall->rreadlink.target);
+		break;
+
+	case L9P_TGETATTR:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pu64(msg, &fcall->tgetattr.request_mask);
+		break;
+
+	case L9P_RGETATTR:
+		l9p_pu64(msg, &fcall->rgetattr.valid);
+		l9p_puqid(msg, &fcall->rgetattr.qid);
+		l9p_pu32(msg, &fcall->rgetattr.mode);
+		l9p_pu32(msg, &fcall->rgetattr.uid);
+		l9p_pu32(msg, &fcall->rgetattr.gid);
+		l9p_pu64(msg, &fcall->rgetattr.nlink);
+		l9p_pu64(msg, &fcall->rgetattr.rdev);
+		l9p_pu64(msg, &fcall->rgetattr.size);
+		l9p_pu64(msg, &fcall->rgetattr.blksize);
+		l9p_pu64(msg, &fcall->rgetattr.blocks);
+		l9p_pu64(msg, &fcall->rgetattr.atime_sec);
+		l9p_pu64(msg, &fcall->rgetattr.atime_nsec);
+		l9p_pu64(msg, &fcall->rgetattr.mtime_sec);
+		l9p_pu64(msg, &fcall->rgetattr.mtime_nsec);
+		l9p_pu64(msg, &fcall->rgetattr.ctime_sec);
+		l9p_pu64(msg, &fcall->rgetattr.ctime_nsec);
+		l9p_pu64(msg, &fcall->rgetattr.btime_sec);
+		l9p_pu64(msg, &fcall->rgetattr.btime_nsec);
+		l9p_pu64(msg, &fcall->rgetattr.gen);
+		r = l9p_pu64(msg, &fcall->rgetattr.data_version);
+		break;
+
+	case L9P_TSETATTR:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->tsetattr.valid);
+		l9p_pu32(msg, &fcall->tsetattr.mode);
+		l9p_pu32(msg, &fcall->tsetattr.uid);
+		l9p_pu32(msg, &fcall->tsetattr.gid);
+		l9p_pu64(msg, &fcall->tsetattr.size);
+		l9p_pu64(msg, &fcall->tsetattr.atime_sec);
+		l9p_pu64(msg, &fcall->tsetattr.atime_nsec);
+		l9p_pu64(msg, &fcall->tsetattr.mtime_sec);
+		r = l9p_pu64(msg, &fcall->tsetattr.mtime_nsec);
+		break;
+
+	case L9P_RSETATTR:
+		break;
+
+	case L9P_TXATTRWALK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu32(msg, &fcall->txattrwalk.newfid);
+		r = l9p_pustring(msg, &fcall->txattrwalk.name);
+		break;
+
+	case L9P_RXATTRWALK:
+		r = l9p_pu64(msg, &fcall->rxattrwalk.size);
+		break;
+
+	case L9P_TXATTRCREATE:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->txattrcreate.name);
+		if (r < 0)
+			break;
+		l9p_pu64(msg, &fcall->txattrcreate.attr_size);
+		r = l9p_pu32(msg, &fcall->txattrcreate.flags);
+		break;
+
+	case L9P_RXATTRCREATE:
+		break;
+
+	case L9P_TFSYNC:
+		r = l9p_pu32(msg, &fcall->hdr.fid);
+		break;
+
+	case L9P_RFSYNC:
+		break;
+
+	case L9P_TLOCK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		l9p_pu8(msg, &fcall->tlock.type);
+		l9p_pu32(msg, &fcall->tlock.flags);
+		l9p_pu64(msg, &fcall->tlock.start);
+		l9p_pu64(msg, &fcall->tlock.length);
+		l9p_pu32(msg, &fcall->tlock.proc_id);
+		r = l9p_pustring(msg, &fcall->tlock.client_id);
+		break;
+
+	case L9P_RLOCK:
+		r = l9p_pu8(msg, &fcall->rlock.status);
+		break;
+
+	case L9P_TGETLOCK:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		/* FALLTHROUGH */
+
+	case L9P_RGETLOCK:
+		l9p_pu8(msg, &fcall->getlock.type);
+		l9p_pu64(msg, &fcall->getlock.start);
+		l9p_pu64(msg, &fcall->getlock.length);
+		l9p_pu32(msg, &fcall->getlock.proc_id);
+		r = l9p_pustring(msg, &fcall->getlock.client_id);
+		break;
+
+	case L9P_TLINK:
+		l9p_pu32(msg, &fcall->tlink.dfid);
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tlink.name);
+		break;
+
+	case L9P_RLINK:
+		break;
+
+	case L9P_TMKDIR:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tmkdir.name);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->tmkdir.mode);
+		r = l9p_pu32(msg, &fcall->tmkdir.gid);
+		break;
+
+	case L9P_RMKDIR:
+		r = l9p_puqid(msg, &fcall->rmkdir.qid);
+		break;
+
+	case L9P_TRENAMEAT:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->trenameat.oldname);
+		if (r < 0)
+			break;
+		l9p_pu32(msg, &fcall->trenameat.newdirfid);
+		r = l9p_pustring(msg, &fcall->trenameat.newname);
+		break;
+
+	case L9P_RRENAMEAT:
+		break;
+
+	case L9P_TUNLINKAT:
+		l9p_pu32(msg, &fcall->hdr.fid);
+		r = l9p_pustring(msg, &fcall->tunlinkat.name);
+		if (r < 0)
+			break;
+		r = l9p_pu32(msg, &fcall->tunlinkat.flags);
+		break;
+
+	case L9P_RUNLINKAT:
+		break;
+
+	default:
+		L9P_LOG(L9P_ERROR, "%s(): missing case for type %d",
+		    __func__, fcall->hdr.type);
+		break;
+	}
+
+	/* Check for over- or under-run, or pustring error. */
+	if (r < 0)
+		return (-1);
+
+	if (msg->lm_mode == L9P_PACK) {
+		/* Rewind to the beginning and install size at front. */
+		uint32_t len = (uint32_t)msg->lm_size;
+		msg->lm_cursor_offset = 0;
+		msg->lm_cursor_iov = 0;
+
+		/*
+		 * Subtract 4 bytes from current size, becase we're
+		 * overwriting size (rewinding message to the beginning)
+		 * and writing again, which will increase it 4 more.
+		 */
+		msg->lm_size -= sizeof(uint32_t);
+
+		if (fcall->hdr.type == L9P_RREAD ||
+		    fcall->hdr.type == L9P_RREADDIR)
+			len += fcall->io.count;
+
+		l9p_pu32(msg, &len);
+	}
+
+	return (0);
+}
+
+/*
+ * Free any strings or other data malloc'ed in the process of
+ * packing or unpacking an fcall.
+ */
+void
+l9p_freefcall(union l9p_fcall *fcall)
+{
+	uint16_t i;
+
+	switch (fcall->hdr.type) {
+
+	case L9P_TVERSION:
+	case L9P_RVERSION:
+		free(fcall->version.version);
+		return;
+
+	case L9P_TATTACH:
+		free(fcall->tattach.aname);
+		free(fcall->tattach.uname);
+		return;
+
+	case L9P_TWALK:
+		for (i = 0; i < fcall->twalk.nwname; i++)
+			free(fcall->twalk.wname[i]);
+		return;
+
+	case L9P_TCREATE:
+	case L9P_TOPEN:
+		free(fcall->tcreate.name);
+		free(fcall->tcreate.extension);
+		return;
+
+	case L9P_RSTAT:
+		l9p_freestat(&fcall->rstat.stat);
+		return;
+
+	case L9P_TWSTAT:
+		l9p_freestat(&fcall->twstat.stat);
+		return;
+
+	case L9P_TLCREATE:
+		free(fcall->tlcreate.name);
+		return;
+
+	case L9P_TSYMLINK:
+		free(fcall->tsymlink.name);
+		free(fcall->tsymlink.symtgt);
+		return;
+
+	case L9P_TMKNOD:
+		free(fcall->tmknod.name);
+		return;
+
+	case L9P_TRENAME:
+		free(fcall->trename.name);
+		return;
+
+	case L9P_RREADLINK:
+		free(fcall->rreadlink.target);
+		return;
+
+	case L9P_TXATTRWALK:
+		free(fcall->txattrwalk.name);
+		return;
+
+	case L9P_TXATTRCREATE:
+		free(fcall->txattrcreate.name);
+		return;
+
+	case L9P_TLOCK:
+		free(fcall->tlock.client_id);
+		return;
+
+	case L9P_TGETLOCK:
+	case L9P_RGETLOCK:
+		free(fcall->getlock.client_id);
+		return;
+
+	case L9P_TLINK:
+		free(fcall->tlink.name);
+		return;
+
+	case L9P_TMKDIR:
+		free(fcall->tmkdir.name);
+		return;
+
+	case L9P_TRENAMEAT:
+		free(fcall->trenameat.oldname);
+		free(fcall->trenameat.newname);
+		return;
+
+	case L9P_TUNLINKAT:
+		free(fcall->tunlinkat.name);
+		return;
+	}
+}
+
+void
+l9p_freestat(struct l9p_stat *stat)
+{
+	free(stat->name);
+	free(stat->extension);
+	free(stat->uid);
+	free(stat->gid);
+	free(stat->muid);
+}
+
+uint16_t
+l9p_sizeof_stat(struct l9p_stat *stat, enum l9p_version version)
+{
+	uint16_t size = L9P_WORD /* size */
+	    + L9P_WORD /* type */
+	    + L9P_DWORD /* dev */
+	    + QID_SIZE /* qid */
+	    + 3 * L9P_DWORD /* mode, atime, mtime */
+	    + L9P_QWORD /* length */
+	    + STRING_SIZE(stat->name)
+	    + STRING_SIZE(stat->uid)
+	    + STRING_SIZE(stat->gid)
+	    + STRING_SIZE(stat->muid);
+
+	if (version >= L9P_2000U) {
+		size += STRING_SIZE(stat->extension)
+		    + 3 * L9P_DWORD;
+	}
+
+	return (size);
+}
Index: lib/lib9p/pytest/.gitignore
===================================================================
--- /dev/null
+++ lib/lib9p/pytest/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+__pycache__
+testconf.ini
Index: lib/lib9p/pytest/Makefile
===================================================================
--- /dev/null
+++ lib/lib9p/pytest/Makefile
@@ -0,0 +1,9 @@
+PYTHON?=python
+
+selftest:
+	for f in lerrno p9err pfod protocol sequencer; do \
+	    ${PYTHON} $$f.py; \
+	done
+
+clean cleandir:
+	rm -rf *.pyc __pycache__ *.log
Index: lib/lib9p/pytest/README
===================================================================
--- /dev/null
+++ lib/lib9p/pytest/README
@@ -0,0 +1,32 @@
+Here are some very skeletal instructions for using
+the client test code.
+
+on server (assumes BSD style LD_LIBRARY_PATH):
+
+mkdir /tmp/foo
+cd lib9p
+env LD_LIBRARY_PATH=. LIB9P_LOGGING=stderr example/server -h localhost -p 12345 /tmp/foo
+
+(this can be run as a non-root user for now, but some things
+only work when run as root)
+
+on client (same machine as server, but can always be run as
+non-root user):
+
+cd lib9p/pytest
+ONE TIME ONLY: copy testconf.ini.sample to testconf.ini, adjust to taste
+./client.py
+
+TODO: rework ./client so it can locate the .ini file better
+
+########
+
+IF USING diod (http://github.com/chaos/diod) AS THE SERVER ON
+A LINUX MACHINE:
+
+ - The instructions for running the server are (or were):
+     sudo ./diod -f -d 1 -n -e /tmp/9
+ - You must mkdir the exported 9pfs file system (e.g., mkdir /tmp/9).
+ - While uname is not really used, aname (the attach name) IS used
+   and must match the exported file system, e.g., testconf.ini
+   must have "aname = /tmp/9".
Index: lib/lib9p/pytest/client.py
===================================================================
--- /dev/null
+++ lib/lib9p/pytest/client.py
@@ -0,0 +1,643 @@
+#! /usr/bin/env python
+
+"""
+Run various tests, as a client.
+"""
+
+from __future__ import print_function
+
+import argparse
+try:
+    import ConfigParser as configparser
+except ImportError:
+    import configparser
+import functools
+import logging
+import os
+import socket
+import struct
+import sys
+import time
+import traceback
+
+import p9conn
+import protocol
+
+LocalError = p9conn.LocalError
+RemoteError = p9conn.RemoteError
+TEError = p9conn.TEError
+
+class TestState(object):
+    def __init__(self):
+        self.config = None
+        self.logger = None
+        self.successes = 0
+        self.skips = 0
+        self.failures = 0
+        self.exceptions = 0
+        self.clnt_tab = {}
+        self.mkclient = None
+        self.stop = False
+        self.gid = 0
+
+    def ccc(self, cid=None):
+        """
+        Connect or reconnect as client (ccc = check and connect client).
+
+        If caller provides a cid (client ID) we check that specific
+        client.  Otherwise the default ID ('base') is used.
+        In any case we return the now-connected client, plus the
+        attachment (session info) if any.
+        """
+        if cid is None:
+            cid = 'base'
+        pair = self.clnt_tab.get(cid)
+        if pair is None:
+            clnt = self.mkclient()
+            pair = [clnt, None]
+            self.clnt_tab[cid] = pair
+        else:
+            clnt = pair[0]
+        if not clnt.is_connected():
+            clnt.connect()
+        return pair
+
+    def dcc(self, cid=None):
+        """
+        Disconnect client (disconnect checked client).  If no specific
+        client ID is provided, this disconnects ALL checked clients!
+        """
+        if cid is None:
+            for cid in list(self.clnt_tab.keys()):
+                self.dcc(cid)
+        pair = self.clnt_tab.get(cid)
+        if pair is not None:
+            clnt = pair[0]
+            if clnt.is_connected():
+                clnt.shutdown()
+            del self.clnt_tab[cid]
+
+    def ccs(self, cid=None):
+        """
+        Like ccc, but establish a session as well, by setting up
+        the uname/n_uname.
+
+        Return the client instance (only).
+        """
+        pair = self.ccc(cid)
+        clnt = pair[0]
+        if pair[1] is None:
+            # No session yet - establish one.  Note, this may fail.
+            section = None if cid is None else ('client-' + cid)
+            aname = getconf(self.config, section, 'aname', '')
+            uname = getconf(self.config, section, 'uname', '')
+            if clnt.proto > protocol.plain:
+                n_uname = getint(self.config, section, 'n_uname', 1001)
+            else:
+                n_uname = None
+            clnt.attach(afid=None, aname=aname, uname=uname, n_uname=n_uname)
+            pair[1] = (aname, uname, n_uname)
+        return clnt
+
+def getconf(conf, section, name, default=None, rtype=str):
+    """
+    Get configuration item for given section, or for "client" if
+    there is no entry for that particular section (or if section
+    is None).
+
+    This lets us get specific values for specific tests or
+    groups ([foo] name=value), falling back to general values
+    ([client] name=value).
+
+    The type of the returned value <rtype> can be str, int, bool,
+    or float.  The default is str (and see getconfint, getconfbool,
+    getconffloat below).
+
+    A default value may be supplied; if it is, that's the default
+    return value (this default should have the right type).  If
+    no default is supplied, a missing value is an error.
+    """
+    try:
+        # note: conf.get(None, 'foo') raises NoSectionError
+        where = section
+        result = conf.get(where, name)
+    except (configparser.NoSectionError, configparser.NoOptionError):
+        try:
+            where = 'client'
+            result = conf.get(where, name)
+        except configparser.NoSectionError:
+            sys.exit('no [{0}] section in configuration!'.format(where))
+        except configparser.NoOptionError:
+            if default is not None:
+                return default
+            if section is not None:
+                where = '[{0}] or [{1}]'.format(section, where)
+            else:
+                where = '[{0}]'.format(where)
+            raise LocalError('need {0}=value in {1}'.format(name, where))
+    where = '[{0}]'.format(where)
+    if rtype is str:
+        return result
+    if rtype is int:
+        return int(result)
+    if rtype is float:
+        return float(result)
+    if rtype is bool:
+        if result.lower() in ('1', 't', 'true', 'y', 'yes'):
+            return True
+        if result.lower() in ('0', 'f', 'false', 'n', 'no'):
+            return False
+        raise ValueError('{0} {1}={2}: invalid boolean'.format(where, name,
+                                                              result))
+    raise ValueError('{0} {1}={2}: internal error: bad result type '
+                     '{3!r}'.format(where, name, result, rtype))
+
+def getint(conf, section, name, default=None):
+    "get integer config item"
+    return getconf(conf, section, name, default, int)
+
+def getfloat(conf, section, name, default=None):
+    "get float config item"
+    return getconf(conf, section, name, default, float)
+
+def getbool(conf, section, name, default=None):
+    "get boolean config item"
+    return getconf(conf, section, name, default, bool)
+
+def pluralize(n, singular, plural):
+    "return singular or plural based on value of n"
+    return plural if n != 1 else singular
+
+class TCDone(Exception):
+    "used in succ/fail/skip - skips rest of testcase with"
+    pass
+
+class TestCase(object):
+    """
+    Start a test case.  Most callers must then do a ccs() to connect.
+
+    A failed test will generally disconnect from the server; a
+    new ccs() will reconnect, if the server is still alive.
+    """
+    def __init__(self, name, tstate):
+        self.name = name
+        self.status = None
+        self.detail = None
+        self.tstate = tstate
+        self._shutdown = None
+        self._autoclunk = None
+        self._acconn = None
+
+    def auto_disconnect(self, conn):
+        self._shutdown = conn
+
+    def succ(self, detail=None):
+        "set success status"
+        self.status = 'SUCC'
+        self.detail = detail
+        raise TCDone()
+
+    def fail(self, detail):
+        "set failure status"
+        self.status = 'FAIL'
+        self.detail = detail
+        raise TCDone()
+
+    def skip(self, detail=None):
+        "set skip status"
+        self.status = 'SKIP'
+        self.detail = detail
+        raise TCDone()
+
+    def autoclunk(self, fid):
+        "mark fid to be closed/clunked on test exit"
+        if self._acconn is None:
+            raise ValueError('autoclunk: no _acconn')
+        self._autoclunk.append(fid)
+
+    def trace(self, msg, *args, **kwargs):
+        "add tracing info to log-file output"
+        level = kwargs.pop('level', logging.INFO)
+        self.tstate.logger.log(level, '      ' + msg, *args, **kwargs)
+
+    def ccs(self):
+        "call tstate ccs, turn socket.error connect failure into test fail"
+        try:
+            self.detail = 'connecting'
+            ret = self.tstate.ccs()
+            self.detail = None
+            self._acconn = ret
+            return ret
+        except socket.error as err:
+            self.fail(str(err))
+
+    def __enter__(self):
+        self.tstate.logger.log(logging.DEBUG, 'ENTER: %s', self.name)
+        self._autoclunk = []
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        tstate = self.tstate
+        eat_exc = False
+        tb_detail = None
+        if exc_type is TCDone:
+            # we exited with succ, fail, or skip
+            eat_exc = True
+            exc_type = None
+        if exc_type is not None:
+            if self.status is None:
+                self.status = 'EXCP'
+            else:
+                self.status += ' EXC'
+            if exc_type == TEError:
+                # timeout/eof - best guess is that we crashed the server!
+                eat_exc = True
+                tb_detail = ['timeout or EOF']
+            elif exc_type in (socket.error, RemoteError, LocalError):
+                eat_exc = True
+                tb_detail = traceback.format_exception(exc_type, exc_val,
+                                                       exc_tb)
+            level = logging.ERROR
+            tstate.failures += 1
+            tstate.exceptions += 1
+        else:
+            if self.status is None:
+                self.status = 'SUCC'
+            if self.status == 'SUCC':
+                level = logging.INFO
+                tstate.successes += 1
+            elif self.status == 'SKIP':
+                level = logging.INFO
+                tstate.skips += 1
+            else:
+                level = logging.ERROR
+                tstate.failures += 1
+        tstate.logger.log(level, '%s: %s', self.status, self.name)
+        if self.detail:
+            tstate.logger.log(level, '      detail: %s', self.detail)
+        if tb_detail:
+            for line in tb_detail:
+                tstate.logger.log(level, '      %s', line.rstrip())
+        for fid in self._autoclunk:
+            self._acconn.clunk(fid, ignore_error=True)
+        if self._shutdown:
+            self._shutdown.shutdown()
+        return eat_exc
+
+def main():
+    "the usual main"
+    parser = argparse.ArgumentParser(description='run tests against a server')
+
+    parser.add_argument('-c', '--config',
+        action='append',
+        help='specify additional file(s) to read (beyond testconf.ini)')
+
+    args = parser.parse_args()
+    config = configparser.SafeConfigParser()
+    # use case sensitive keys
+    config.optionxform = str
+
+    try:
+        with open('testconf.ini', 'r') as stream:
+            config.readfp(stream)
+    except (OSError, IOError) as err:
+        sys.exit(str(err))
+    if args.config:
+        ok = config.read(args.config)
+        failed = set(ok) - set(args.config)
+        if len(failed):
+            nfailed = len(failed)
+            word = 'files' if nfailed > 1 else 'file'
+            failed = ', '.join(failed)
+            print('failed to read {0} {1}: {2}'.format(nfailed, word, failed))
+            sys.exit(1)
+
+    logging.basicConfig(level=config.get('client', 'loglevel').upper())
+    logger = logging.getLogger(__name__)
+    tstate = TestState()
+    tstate.logger = logger
+    tstate.config = config
+
+    server = config.get('client', 'server')
+    port = config.getint('client', 'port')
+    proto = config.get('client', 'protocol')
+    may_downgrade = config.getboolean('client', 'may_downgrade')
+    timeout = config.getfloat('client', 'timeout')
+
+    tstate.stop = True # unless overwritten below
+    with TestCase('send bad packet', tstate) as tc:
+        tc.detail = 'connecting to {0}:{1}'.format(server, port)
+        try:
+            conn = p9conn.P9SockIO(logger, server=server, port=port)
+        except socket.error as err:
+            tc.fail('cannot connect at all (server down?)')
+        tc.auto_disconnect(conn)
+        tc.detail = None
+        pkt = struct.pack('<I', 256);
+        conn.write(pkt)
+        # ignore reply if any, we're just trying to trip the server
+        tstate.stop = False
+        tc.succ()
+
+    if not tstate.stop:
+        tstate.mkclient = functools.partial(p9conn.P9Client, logger,
+                                           timeout, proto, may_downgrade,
+                                           server=server, port=port)
+        tstate.stop = True
+        with TestCase('send bad Tversion', tstate) as tc:
+            try:
+                clnt = tstate.mkclient()
+            except socket.error as err:
+                tc.fail('can no longer connect, did bad pkt crash server?')
+            tc.auto_disconnect(clnt)
+            clnt.set_monkey('version', b'wrongo, fishbreath!')
+            tc.detail = 'connecting'
+            try:
+                clnt.connect()
+            except RemoteError as err:
+                tstate.stop = False
+                tc.succ(err.args[0])
+            tc.fail('server accepted a bad Tversion')
+
+    if not tstate.stop:
+        # All NUL characters in strings are invalid.
+        with TestCase('send illegal NUL in Tversion', tstate) as tc:
+            clnt = tstate.mkclient()
+            tc.auto_disconnect(clnt)
+            clnt.set_monkey('version', b'9P2000\0')
+            # Forcibly allow downgrade so that Tversion
+            # succeeds if they ignore the \0.
+            clnt.may_downgrade = True
+            tc.detail = 'connecting'
+            try:
+                clnt.connect()
+            except (TEError, RemoteError) as err:
+                tc.succ(err.args[0])
+            tc.fail('server accepted NUL in Tversion')
+
+    if not tstate.stop:
+        with TestCase('connect normally', tstate) as tc:
+            tc.detail = 'connecting'
+            try:
+                tstate.ccc()
+            except RemoteError as err:
+                # can't test any further, but this might be success
+                tstate.stop = True
+                if 'they only support version' in err.args[0]:
+                    tc.succ(err.args[0])
+                tc.fail(err.args[0])
+            tc.succ()
+
+    if not tstate.stop:
+        with TestCase('attach with bad afid', tstate) as tc:
+            clnt = tstate.ccc()[0]
+            section = 'attach-with-bad-afid'
+            aname = getconf(tstate.config, section, 'aname', '')
+            uname = getconf(tstate.config, section, 'uname', '')
+            if clnt.proto > protocol.plain:
+                n_uname = getint(tstate.config, section, 'n_uname', 1001)
+            else:
+                n_uname = None
+            try:
+                clnt.attach(afid=42, aname=aname, uname=uname, n_uname=n_uname)
+            except RemoteError as err:
+                tc.succ(err.args[0])
+            tc.dcc()
+            tc.fail('bad attach afid not rejected')
+
+    try:
+        if not tstate.stop:
+            # Various Linux tests need gids.  Just get them for everyone.
+            tstate.gid = getint(tstate.config, 'client', 'gid', 0)
+            more_test_cases(tstate)
+    finally:
+        tstate.dcc()
+
+    n_tests = tstate.successes + tstate.failures
+    print('summary:')
+    if tstate.successes:
+        print('{0}/{1} tests succeeded'.format(tstate.successes, n_tests))
+    if tstate.failures:
+        print('{0}/{1} tests failed'.format(tstate.failures, n_tests))
+    if tstate.skips:
+        print('{0} {1} skipped'.format(tstate.skips,
+                                       pluralize(tstate.skips,
+                                                 'test', 'tests')))
+    if tstate.exceptions:
+        print('{0} {1} occurred'.format(tstate.exceptions,
+                                       pluralize(tstate.exceptions,
+                                                 'exception', 'exceptions')))
+    if tstate.stop:
+        print('tests stopped early')
+    return 1 if tstate.stop or tstate.exceptions or tstate.failures else 0
+
+def more_test_cases(tstate):
+    "run cases that can only proceed if connecting works at all"
+    with TestCase('attach normally', tstate) as tc:
+        tc.ccs()
+        tc.succ()
+    if tstate.stop:
+        return
+
+    # Empty string is not technically illegal.  It's not clear
+    # whether it should be accepted or rejected.  However, it
+    # used to crash the server entirely, so it's a desirable
+    # test case.
+    with TestCase('empty string in Twalk request', tstate) as tc:
+        clnt = tc.ccs()
+        try:
+            fid, qid = clnt.lookup(clnt.rootfid, [b''])
+        except RemoteError as err:
+            tc.succ(err.args[0])
+        clnt.clunk(fid)
+        tc.succ('note: empty Twalk component name not rejected')
+
+    # Name components may not contain /
+    with TestCase('embedded / in lookup component name', tstate) as tc:
+        clnt = tc.ccs()
+        try:
+            fid, qid = clnt.lookup(clnt.rootfid, [b'/'])
+            tc.autoclunk(fid)
+        except RemoteError as err:
+            tc.succ(err.args[0])
+        tc.fail('/ in lookup component name not rejected')
+
+    # Proceed from a clean tree.  As a side effect, this also tests
+    # either the old style readdir (read() on a directory fid) or
+    # the dot-L readdir().
+    #
+    # The test case will fail if we don't have permission to remove
+    # some file(s).
+    with TestCase('clean up tree (readdir+remove)', tstate) as tc:
+        clnt = tc.ccs()
+        fset = clnt.uxreaddir(b'/')
+        fset = [i for i in fset if i != '.' and i != '..']
+        tc.trace("what's there initially: {0!r}".format(fset))
+        try:
+            clnt.uxremove(b'/', force=False, recurse=True)
+        except RemoteError as err:
+            tc.trace('failed to read or clean up tree', level=logging.ERROR)
+            tc.trace('this might be a permissions error', level=logging.ERROR)
+            tstate.stop = True
+            tc.fail(str(err))
+        fset = clnt.uxreaddir(b'/')
+        fset = [i for i in fset if i != '.' and i != '..']
+        tc.trace("what's left after removing everything: {0!r}".format(fset))
+        if fset:
+            tstate.stop = True
+            tc.trace('note: could be a permissions error', level=logging.ERROR)
+            tc.fail('/ not empty after removing all: {0!r}'.format(fset))
+        tc.succ()
+    if tstate.stop:
+        return
+
+    # Name supplied to create, mkdir, etc, may not contain /.
+    # Note that this test may fail for the wrong reason if /dir
+    # itself does not already exist, so first let's make /dir.
+    only_dotl = getbool(tstate.config, 'client', 'only_dotl', False)
+    with TestCase('mkdir', tstate) as tc:
+        clnt = tc.ccs()
+        if only_dotl and not clnt.supports(protocol.td.Tmkdir):
+            tc.skip('cannot test dot-L mkdir on {0}'.format(clnt.proto))
+        try:
+            fid, qid = clnt.uxlookup(b'/dir', None)
+            tc.autoclunk(fid)
+            tstate.stop = True
+            tc.fail('found existing /dir after cleaning tree')
+        except RemoteError as err:
+            # we'll just assume it's "no such file or directory"
+            pass
+        if only_dotl:
+            qid = clnt.mkdir(clnt.rootfid, b'dir', 0o777, tstate.gid)
+        else:
+            qid, _ = clnt.create(clnt.rootfid, b'dir',
+                                 protocol.td.DMDIR | 0o777,
+                                 protocol.td.OREAD)
+        if qid.type != protocol.td.QTDIR:
+            tstate.stop = True
+            tc.fail('creating /dir: result is not a directory')
+        tc.trace('now attempting to create /dir/sub the wrong way')
+        try:
+            if only_dotl:
+                qid = clnt.mkdir(clnt.rootfid, b'dir/sub', 0o777, tstate.gid)
+            else:
+                qid, _ = clnt.create(clnt.rootfid, b'dir/sub',
+                                     protocol.td.DMDIR | 0o777,
+                                     protocol.td.OREAD)
+            # it's not clear what happened on the server at this point!
+            tc.trace("creating dir/sub (with embedded '/') should have "
+                     'failed but did not')
+            tstate.stop = True
+            fset = clnt.uxreaddir(b'/dir')
+            if 'sub' in fset:
+                tc.trace('(found our dir/sub detritus)')
+                clnt.uxremove(b'dir/sub', force=True)
+                fset = clnt.uxreaddir(b'/dir')
+                if 'sub' not in fset:
+                    tc.trace('(successfully removed our dir/sub detritus)')
+                    tstate.stop = False
+            tc.fail('created dir/sub as single directory with embedded slash')
+        except RemoteError as err:
+            # we'll just assume it's the right kind of error
+            tc.trace('invalid path dir/sub failed with: %s', str(err))
+            tc.succ('embedded slash in mkdir correctly refused')
+    if tstate.stop:
+        return
+
+    with TestCase('getattr/setattr', tstate) as tc:
+        # This test is not really thorough enough, need to test
+        # all combinations of settings.  Should also test that
+        # old values are restored on failure, although it is not
+        # clear how to trigger failures.
+        clnt = tc.ccs()
+        if not clnt.supports(protocol.td.Tgetattr):
+            tc.skip('%s does not support Tgetattr', clnt)
+        fid, _, _, _ = clnt.uxopen(b'/dir/file', os.O_CREAT | os.O_RDWR, 0o666,
+            gid=tstate.gid)
+        tc.autoclunk(fid)
+        written = clnt.write(fid, 0, 'bytes\n')
+        if written != 6:
+            tc.trace('expected to write 6 bytes, actually wrote %d', written,
+                     level=logging.WARN)
+        attrs = clnt.Tgetattr(fid)
+        #tc.trace('getattr: after write, before setattr: got %s', attrs)
+        if attrs.size != written:
+            tc.fail('getattr: expected size=%d, got size=%d',
+                    written, attrs.size)
+        # now truncate, set mtime to (3,14), and check result
+        set_time_to = p9conn.Timespec(sec=0, nsec=140000000)
+        clnt.Tsetattr(fid, size=0, mtime=set_time_to)
+        attrs = clnt.Tgetattr(fid)
+        #tc.trace('getattr: after setattr: got %s', attrs)
+        if attrs.mtime.sec != set_time_to.sec or attrs.size != 0:
+            tc.fail('setattr: expected to get back mtime.sec={0}, size=0; '
+                    'got mtime.sec={1}, size='
+                    '{1}'.format(set_time_to.sec, attrs.mtime.sec, attrs.size))
+        # nsec is not as stable but let's check
+        if attrs.mtime.nsec != set_time_to.nsec:
+            tc.trace('setattr: expected to get back mtime_nsec=%d; '
+                     'got %d', set_time_to.nsec, mtime_nsec)
+        tc.succ('able to set and see size and mtime')
+
+    # this test should be much later, but we know the current
+    # server is broken...
+    with TestCase('rename adjusts other fids', tstate) as tc:
+        clnt = tc.ccs()
+        dirfid, _ = clnt.uxlookup(b'/dir')
+        tc.autoclunk(dirfid)
+        clnt.uxmkdir(b'd1', 0o777, tstate.gid, startdir=dirfid)
+        clnt.uxmkdir(b'd1/sub', 0o777, tstate.gid, startdir=dirfid)
+        d1fid, _ = clnt.uxlookup(b'd1', dirfid)
+        tc.autoclunk(d1fid)
+        subfid, _ = clnt.uxlookup(b'sub', d1fid)
+        tc.autoclunk(subfid)
+        fid, _, _, _ = clnt.uxopen(b'file', os.O_CREAT | os.O_RDWR,
+                                   0o666, startdir=subfid, gid=tstate.gid)
+        tc.autoclunk(fid)
+        written = clnt.write(fid, 0, 'filedata\n')
+        if written != 9:
+            tc.trace('expected to write 9 bytes, actually wrote %d', written,
+                     level=logging.WARN)
+        # Now if we rename /dir/d1 to /dir/d2, the fids for both
+        # sub/file and sub itself should still be usable.  This
+        # holds for both Trename (Linux only) and Twstat based
+        # rename ops.
+        #
+        # Note that some servers may cache some number of files and/or
+        # diretories held open, so we should open many fids to wipe
+        # out the cache (XXX notyet).
+        if clnt.supports(protocol.td.Trename):
+            clnt.rename(d1fid, dirfid, name=b'd2')
+        else:
+            clnt.wstat(d1fid, name=b'd2')
+        try:
+            rofid, _, _, _ = clnt.uxopen(b'file', os.O_RDONLY, startdir=subfid)
+            clnt.clunk(rofid)
+        except RemoteError as err:
+            tc.fail('open file in renamed dir/d2/sub: {0}'.format(err))
+        tc.succ()
+
+    # Even if xattrwalk is supported by the protocol, it's optional
+    # on the server.
+    with TestCase('xattrwalk', tstate) as tc:
+        clnt = tc.ccs()
+        if not clnt.supports(protocol.td.Txattrwalk):
+            tc.skip('{0} does not support Txattrwalk'.format(clnt))
+        dirfid, _ = clnt.uxlookup(b'/dir')
+        tc.autoclunk(dirfid)
+        try:
+            # need better tests...
+            attrfid, size = clnt.xattrwalk(dirfid)
+            tc.autoclunk(attrfid)
+            data = clnt.read(attrfid, 0, size)
+            tc.trace('xattrwalk with no name: data=%r', data)
+            tc.succ('xattrwalk size={0} datalen={1}'.format(size, len(data)))
+        except RemoteError as err:
+            tc.trace('xattrwalk on /dir: {0}'.format(err))
+        tc.succ('xattrwalk apparently not implemented')
+
+if __name__ == '__main__':
+    try:
+        sys.exit(main())
+    except KeyboardInterrupt:
+        sys.exit('\nInterrupted')
Index: lib/lib9p/pytest/lerrno.py
===================================================================
--- /dev/null
+++ lib/lib9p/pytest/lerrno.py
@@ -0,0 +1,291 @@
+#! /usr/bin/env python
+
+"""
+Error number definitions for Linux.
+"""
+
+EPERM =                 1
+ENOENT =                2
+ESRCH =                 3
+EINTR =                 4
+EIO =                   5
+ENXIO =                 6
+E2BIG =                 7
+ENOEXEC =               8
+EBADF =                 9
+ECHILD =                10
+EAGAIN =                11
+ENOMEM =                12
+EACCES =                13
+EFAULT =                14
+ENOTBLK =               15
+EBUSY =                 16
+EEXIST =                17
+EXDEV =                 18
+ENODEV =                19
+ENOTDIR =               20
+EISDIR =                21
+EINVAL =                22
+ENFILE =                23
+EMFILE =                24
+ENOTTY =                25
+ETXTBSY =               26
+EFBIG =                 27
+ENOSPC =                28
+ESPIPE =                29
+EROFS =                 30
+EMLINK =                31
+EPIPE =                 32
+EDOM =                  33
+ERANGE =                34
+EDEADLK =               35
+ENAMETOOLONG =          36
+ENOLCK =                37
+ENOSYS =                38
+ENOTEMPTY =             39
+ELOOP =                 40
+#                       41 unused
+ENOMSG =                42
+EIDRM =                 43
+ECHRNG =                44
+EL2NSYNC =              45
+EL3HLT =                46
+EL3RST =                47
+ELNRNG =                48
+EUNATCH =               49
+ENOCSI =                50
+EL2HLT =                51
+EBADE =                 52
+EBADR =                 53
+EXFULL =                54
+ENOANO =                55
+EBADRQC =               56
+EBADSLT =               57
+#                       58 unused
+EBFONT =                59
+ENOSTR =                60
+ENODATA =               61
+ETIME =                 62
+ENOSR =                 63
+ENONET =                64
+ENOPKG =                65
+EREMOTE =               66
+ENOLINK =               67
+EADV =                  68
+ESRMNT =                69
+ECOMM =                 70
+EPROTO =                71
+EMULTIHOP =             72
+EDOTDOT =               73
+EBADMSG =               74
+EOVERFLOW =             75
+ENOTUNIQ =              76
+EBADFD =                77
+EREMCHG =               78
+ELIBACC =               79
+ELIBBAD =               80
+ELIBSCN =               81
+ELIBMAX =               82
+ELIBEXEC =              83
+EILSEQ =                84
+ERESTART =              85
+ESTRPIPE =              86
+EUSERS =                87
+ENOTSOCK =              88
+EDESTADDRREQ =          89
+EMSGSIZE =              90
+EPROTOTYPE =            91
+ENOPROTOOPT =           92
+EPROTONOSUPPORT =       93
+ESOCKTNOSUPPORT =       94
+EOPNOTSUPP =            95
+EPFNOSUPPORT =          96
+EAFNOSUPPORT =          97
+EADDRINUSE =            98
+EADDRNOTAVAIL =         99
+ENETDOWN =              100
+ENETUNREACH =           101
+ENETRESET =             102
+ECONNABORTED =          103
+ECONNRESET =            104
+ENOBUFS =               105
+EISCONN =               106
+ENOTCONN =              107
+ESHUTDOWN =             108
+ETOOMANYREFS =          109
+ETIMEDOUT =             110
+ECONNREFUSED =          111
+EHOSTDOWN =             112
+EHOSTUNREACH =          113
+EALREADY =              114
+EINPROGRESS =           115
+ESTALE =                116
+EUCLEAN =               117
+ENOTNAM =               118
+ENAVAIL =               119
+EISNAM =                120
+EREMOTEIO =             121
+EDQUOT =                122
+ENOMEDIUM =             123
+EMEDIUMTYPE =           124
+ECANCELED =             125
+ENOKEY =                126
+EKEYEXPIRED =           127
+EKEYREVOKED =           128
+EKEYREJECTED =          129
+EOWNERDEAD =            130
+ENOTRECOVERABLE =       131
+ERFKILL =               132
+EHWPOISON =             133
+
+_strerror = {
+    EPERM:              'Permission denied',
+    ENOENT:             'No such file or directory',
+    ESRCH:              'No such process',
+    EINTR:              'Interrupted system call',
+    EIO:                'Input/output error',
+    ENXIO:              'Device not configured',
+    E2BIG:              'Argument list too long',
+    ENOEXEC:            'Exec format error',
+    EBADF:              'Bad file descriptor',
+    ECHILD:             'No child processes',
+    EAGAIN:             'Resource temporarily unavailable',
+    ENOMEM:             'Cannot allocate memory',
+    EACCES:             'Permission denied',
+    EFAULT:             'Bad address',
+    ENOTBLK:            'Block device required',
+    EBUSY:              'Device busy',
+    EEXIST:             'File exists',
+    EXDEV:              'Cross-device link',
+    ENODEV:             'Operation not supported by device',
+    ENOTDIR:            'Not a directory',
+    EISDIR:             'Is a directory',
+    EINVAL:             'Invalid argument',
+    ENFILE:             'Too many open files in system',
+    EMFILE:             'Too many open files',
+    ENOTTY:             'Inappropriate ioctl for device',
+    ETXTBSY:            'Text file busy',
+    EFBIG:              'File too large',
+    ENOSPC:             'No space left on device',
+    ESPIPE:             'Illegal seek',
+    EROFS:              'Read-only filesystem',
+    EMLINK:             'Too many links',
+    EPIPE:              'Broken pipe',
+    EDOM:               'Numerical argument out of domain',
+    ERANGE:             'Result too large',
+    EDEADLK:            'Resource deadlock avoided',
+    ENAMETOOLONG:       'File name too long',
+    ENOLCK:             'No locks available',
+    ENOSYS:             'Function not implemented',
+    ENOTEMPTY:          'Directory not empty',
+    ELOOP:              'Too many levels of symbolic links',
+    ENOMSG:             'No message of desired type',
+    EIDRM:              'Identifier removed',
+    ECHRNG:             'Channel number out of range',
+    EL2NSYNC:           'Level 2 not synchronized',
+    EL3HLT:             'Level 3 halted',
+    EL3RST:             'Level 3 reset',
+    ELNRNG:             'Link number out of range',
+    EUNATCH:            'Protocol driver not attached',
+    ENOCSI:             'No CSI structure available',
+    EL2HLT:             'Level 2 halted',
+    EBADE:              'Invalid exchange',
+    EBADR:              'Invalid request descriptor',
+    EXFULL:             'Exchange full',
+    ENOANO:             'No anode',
+    EBADRQC:            'Invalid request code',
+    EBADSLT:            'Invalid slot',
+    EBFONT:             'Bad font file format',
+    ENOSTR:             'Device not a stream',
+    ENODATA:            'No data available',
+    ETIME:              'Timer expired',
+    ENOSR:              'Out of streams resources',
+    ENONET:             'Machine is not on the network',
+    ENOPKG:             'Package not installed',
+    EREMOTE:            'Object is remote',
+    ENOLINK:            'Link has been severed',
+    EADV:               'Advertise error',
+    ESRMNT:             'Srmount error',
+    ECOMM:              'Communication error on send',
+    EPROTO:             'Protocol error',
+    EMULTIHOP:          'Multihop attempted',
+    EDOTDOT:            'RFS specific error',
+    EBADMSG:            'Bad message',
+    EOVERFLOW:          'Value too large for defined data type',
+    ENOTUNIQ:           'Name not unique on network',
+    EBADFD:             'File descriptor in bad state',
+    EREMCHG:            'Remote address changed',
+    ELIBACC:            'Can not access a needed shared library',
+    ELIBBAD:            'Accessing a corrupted shared library',
+    ELIBSCN:            '.lib section in a.out corrupted',
+    ELIBMAX:            'Attempting to link in too many shared libraries',
+    ELIBEXEC:           'Cannot exec a shared library directly',
+    EILSEQ:             'Invalid or incomplete multibyte or wide character',
+    ERESTART:           'Interrupted system call should be restarted',
+    ESTRPIPE:           'Streams pipe error',
+    EUSERS:             'Too many users',
+    ENOTSOCK:           'Socket operation on non-socket',
+    EDESTADDRREQ:       'Destination address required',
+    EMSGSIZE:           'Message too long',
+    EPROTOTYPE:         'Protocol wrong type for socket',
+    ENOPROTOOPT:        'Protocol not available',
+    EPROTONOSUPPORT:    'Protocol not supported',
+    ESOCKTNOSUPPORT:    'Socket type not supported',
+    EOPNOTSUPP:         'Operation not supported',
+    EPFNOSUPPORT:       'Protocol family not supported',
+    EAFNOSUPPORT:       'Address family not supported by protocol',
+    EADDRINUSE:         'Address already in use',
+    EADDRNOTAVAIL:      'Cannot assign requested address',
+    ENETDOWN:           'Network is down',
+    ENETUNREACH:        'Network is unreachable',
+    ENETRESET:          'Network dropped connection on reset',
+    ECONNABORTED:       'Software caused connection abort',
+    ECONNRESET:         'Connection reset by peer',
+    ENOBUFS:            'No buffer space available',
+    EISCONN:            'Transport endpoint is already connected',
+    ENOTCONN:           'Transport endpoint is not connected',
+    ESHUTDOWN:          'Cannot send after transport endpoint shutdown',
+    ETOOMANYREFS:       'Too many references: cannot splice',
+    ETIMEDOUT:          'Connection timed out',
+    ECONNREFUSED:       'Connection refused',
+    EHOSTDOWN:          'Host is down',
+    EHOSTUNREACH:       'No route to host',
+    EALREADY:           'Operation already in progress',
+    EINPROGRESS:        'Operation now in progress',
+    ESTALE:             'Stale file handle',
+    EUCLEAN:            'Structure needs cleaning',
+    ENOTNAM:            'Not a XENIX named type file',
+    ENAVAIL:            'No XENIX semaphores available',
+    EISNAM:             'Is a named type file',
+    EREMOTEIO:          'Remote I/O error',
+    EDQUOT:             'Quota exceeded',
+    ENOMEDIUM:          'No medium found',
+    EMEDIUMTYPE:        'Wrong medium type',
+    ECANCELED:          'Operation canceled',
+    ENOKEY:             'Required key not available',
+    EKEYEXPIRED:        'Key has expired',
+    EKEYREVOKED:        'Key has been revoked',
+    EKEYREJECTED:       'Key was rejected by service',
+    EOWNERDEAD:         'Owner died',
+    ENOTRECOVERABLE:    'State not recoverable',
+    ERFKILL:            'Operation not possible due to RF-kill',
+    EHWPOISON:          'Memory page has hardware error',
+}
+
+def strerror(errnum):
+    """
+    Translate Linux errno to string.
+
+    >>> strerror(ENOKEY)
+    'Required key not available'
+    >>> strerror(41)
+    'Unknown error 41'
+    """
+    ret = _strerror.get(errnum)
+    if ret:
+        return ret
+    return 'Unknown error {0}'.format(errnum)
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
Index: lib/lib9p/pytest/numalloc.py
===================================================================
--- /dev/null
+++ lib/lib9p/pytest/numalloc.py
@@ -0,0 +1,379 @@
+#! /usr/bin/env python
+
+"""
+Integer number allocator.
+
+Basically, these keep track of a set of allocatable values in
+some range (you provide min and max) and let you allocate out of
+the range and return values into the range.
+
+You may pick a value using "next since last time", or "next
+available after provided value".  Note that next-after will
+wrap around as needed (modular arithmetic style).
+
+The free lists are thread-locked so that this code can be used
+with threads.
+
+    >>> a = NumAlloc(5, 10) # note closed interval: 5..10 inclusive
+    >>> a
+    NumAlloc(5, 10)
+    >>> a.avail
+    [[5, 10]]
+    >>> a.alloc()
+    5
+    >>> a.avail
+    [[6, 10]]
+    >>> a.alloc(8)
+    8
+    >>> a.avail
+    [[6, 7], [9, 10]]
+    >>> a.free(5)
+    >>> a.avail
+    [[5, 7], [9, 10]]
+    >>> a.free(8)
+    >>> a.avail
+    [[5, 10]]
+
+Attempting to free a value that is already free is an error:
+
+    >>> a.free(5)
+    Traceback (most recent call last):
+       ...
+    ValueError: free: 5 already available
+
+You can, however, free a value that is outside the min/max
+range.  You can also free multiple values at once:
+
+    >>> a.free_multi([0, 1, 2, 4])
+    >>> a.avail
+    [[0, 2], [4, 10]]
+    >>> a.free_multi([3, 12])
+    >>> a.avail
+    [[0, 10], [12, 12]]
+
+Note that this changes the min/max values:
+
+    >>> a
+    NumAlloc(0, 12)
+
+To prevent adding values outside the min/max range, create the
+NumArray with autoextend=False, or set .autoextend=False at any
+time:
+
+    >>> a.autoextend = False
+    >>> a
+    NumAlloc(0, 12, autoextend=False)
+    >>> a.free(13)
+    Traceback (most recent call last):
+       ...
+    ValueError: free: 13 is outside range limit
+
+You can create an empty range, which is really only useful once
+you free values into it:
+
+    >>> r = NumAlloc(0, -1)
+    >>> r
+    NumAlloc(0, -1)
+    >>> r.alloc() is None
+    True
+    >>> r.free_multi(range(50))
+    >>> r
+    NumAlloc(0, 49)
+
+Note that r.alloc() starts from where you last left off, even if
+you've freed a value:
+
+    >>> r.alloc()
+    0
+    >>> r.free(0)
+    >>> r.alloc()
+    1
+
+Of course, in multithreaded code you can't really depend on this
+since it will race other threads.  Still, it generally makes for
+efficient allocation.  To force allocation to start from the
+range's minimum, provide the minimum (e.g., r.min_val) as an
+argument to r.alloc():
+
+    >>> r.alloc()
+    2
+    >>> r.alloc(r.min_val)
+    0
+
+Providing a number to alloc() tries to allocate that number,
+but wraps around to the next one if needed:
+
+    >>> r.alloc(49)
+    49
+    >>> r.alloc(49)
+    3
+    >>> r.alloc(99999)
+    4
+    >>> r.avail
+    [[5, 48]]
+
+There is currently no way to find all allocated values, although
+the obvious method (going through r.avail) will work.  Any iterator
+would not be thread-safe.
+"""
+
+import threading
+
+class NumAlloc(object):
+    """
+    Number allocator object.
+    """
+    def __init__(self, min_val, max_val, autoextend=True):
+        self.min_val = min_val
+        self.max_val = max_val
+        if min_val <= max_val:
+            self.avail = [[min_val, max_val]]
+        else:
+            self.avail = []
+        self.autoextend = autoextend
+        self.last = None
+        self.lock = threading.Lock()
+
+    def __repr__(self):
+        myname = self.__class__.__name__
+        if self.autoextend:
+            ae = ''
+        else:
+            ae = ', autoextend=False'
+        return '{0}({1}, {2}{3})'.format(myname, self.min_val, self.max_val, ae)
+
+    def _find_block(self, val):
+        """
+        Find the block that contains val, or that should contain val.
+        Remember that self.avail is a list of avaliable ranges of
+        the form [[min1, max1], [min2, max2], ..., [minN, maxN]]
+        where max1 < min2, max2 < min3, ..., < minN.
+
+        The input value either falls into one of the available
+        blocks, or falls into a gap between two available blocks.
+        We want to know which block it goes in, or if it goes
+        between two, which block it comes before.
+
+        We can do a binary search to find this block.  When we
+        find it, return its index and its values.
+
+        If we find that val is not in a block, return the position
+        where the value should go, were it to be put into a new
+        block by itself.  E.g., suppose val is 17, and there is a
+        block [14,16] and a block [18,20]. We would make this
+        [14,16],[17,17],[18,20] by inserting [17,17] between them.
+        (Afterward, we will want to fuse all three blocks to make
+        [14,18].  However, if we insert as block 0, e.g., if the
+        list starts with [18,20] and we insert to get
+        [17,17][18,20], we really end up just modifying block 0 to
+        [17,20].  Or, if we insert as the new final block, we
+        might end up modifying the last block.)
+        """
+        low = 0
+        high = len(self.avail) - 1
+        while low <= high:
+            mid = low + ((high - low) // 2)
+            pair = self.avail[mid]
+            if val < pair[0]:
+                # must go before block mid
+                high = mid - 1
+            elif val > pair[1]:
+                # must go after block mid
+                low = mid + 1
+            else:
+                # val >= first and val <= last, so we found it
+                return mid, pair
+        # Low > high: no block actually contains val, or
+        # there are no blocks at all.  If there are no blocks,
+        # return block #0 and None.  Otherwise return the
+        return low, None
+
+    def alloc(self, val=None):
+        """
+        Get new available value.
+
+        If val is None, we start from the most recently
+        allocated value, plus 1.
+
+        If val is a numeric value, we start from that value.
+        Hence, since the range is min_val..max_val, you can
+        provide min_val to take the first available value.
+
+        This may return None, if no values are still available.
+        """
+        with self.lock:
+            if val is None:
+                val = self.last + 1 if self.last is not None else self.min_val
+            if val is None or val > self.max_val or val < self.min_val:
+                val = self.min_val
+            i, pair = self._find_block(val)
+            if pair is None:
+                # Value is is not available.  The next
+                # available value that is greater than val
+                # is in the block right after block i.
+                # If there is no block after i, the next
+                # available value is in block 0.  If there
+                # is no block 0, there are no available
+                # values.
+                nblocks = len(self.avail)
+                i += 1
+                if i >= nblocks:
+                    if nblocks == 0:
+                        return None
+                    i = 0
+                pair = self.avail[i]
+                val = pair[0]
+            # Value val is available - take it.
+            #
+            # There are four special cases to handle.
+            #
+            # 1. pair[0] < val < pair[1]: split the pair.
+            # 2. pair[0] == val < pair[1]: increase pair[0].
+            # 3. pair[0] == val == pair[1]: delete the pair
+            # 4. pair[0] < val == pair[1]: decrease pair[1].
+            assert pair[0] <= val <= pair[1]
+            if pair[0] == val:
+                # case 2 or 3: Take the left edge or delete the pair.
+                if val == pair[1]:
+                    del self.avail[i]
+                else:
+                    pair[0] = val + 1
+            else:
+                # case 1 or 4: split the pair or take the right edge.
+                if val == pair[1]:
+                    pair[1] = val - 1
+                else:
+                    newpair = [val + 1, pair[1]]
+                    pair[1] = val - 1
+                    self.avail.insert(i + 1, newpair)
+            self.last = val
+            return val
+
+    def free(self, val):
+        "Free one value"
+        self._free_multi('free', [val])
+
+    def free_multi(self, values):
+        "Free many values (provide any iterable)"
+        values = list(values)
+        values.sort()
+        self._free_multi('free_multi', values)
+
+    def _free_multi(self, how, values):
+        """
+        Free a (sorted) list of values.
+        """
+        if len(values) == 0:
+            return
+        with self.lock:
+            while values:
+                # Take highest value, and any contiguous lower values.
+                # Note that it can be significantly faster this way
+                # since coalesced ranges make for shorter copies.
+                highval = values.pop()
+                val = highval
+                while len(values) and values[-1] == val - 1:
+                    val = values.pop()
+                self._free_range(how, val, highval)
+
+    def _maybe_increase_max(self, how, val):
+        """
+        If needed, widen our range to include new high val -- i.e.,
+        possibly increase self.max_val.  Do nothing if this is not a
+        new all time high; fail if we have autoextend disabled.
+        """
+        if val <= self.max_val:
+            return
+        if self.autoextend:
+            self.max_val = val
+            return
+        raise ValueError('{0}: {1} is outside range limit'.format(how, val))
+
+    def _maybe_decrease_min(self, how, val):
+        """
+        If needed, widen our range to include new low val -- i.e.,
+        possibly decrease self.min_val.  Do nothing if this is not a
+        new all time low; fail if we have autoextend disabled.
+        """
+        if val >= self.min_val:
+            return
+        if self.autoextend:
+            self.min_val = val
+            return
+        raise ValueError('{0}: {1} is outside range limit'.format(how, val))
+
+    def _free_range(self, how, val, highval):
+        """
+        Free the range [val..highval].  Note, val==highval it's just
+        a one-element range.
+
+        The lock is already held.
+        """
+        # Find the place to store the lower value.
+        # We should never find an actual pair here.
+        i, pair = self._find_block(val)
+        if pair:
+            raise ValueError('{0}: {1} already available'.format(how, val))
+        # If we're freeing a range, check that the high val
+        # does not span into the *next* range, either.
+        if highval > val and i < len(self.avail):
+            if self.avail[i][0] <= highval:
+                raise ValueError('{0}: {2} (from {{1}..{2}) already '
+                                 'available'.format(how, val, highval))
+
+        # We'll need to insert a block and perhaps fuse it
+        # with blocks before and/or after.  First, check
+        # whether there *is* a before and/or after, and find
+        # their corresponding edges and whether we abut them.
+        if i > 0:
+            abuts_below = self.avail[i - 1][1] + 1 == val
+        else:
+            abuts_below = False
+        if i < len(self.avail):
+            abuts_above = self.avail[i][0] - 1 == highval
+        else:
+            abuts_above = False
+        # Now there are these four cases:
+        # 1. abuts below and above: fuse the two blocks.
+        # 2. abuts below only: adjust previous (i-1'th) block
+        # 3. abuts above only: adjust next (i'th) block
+        # 4. doesn't abut: insert new block
+        if abuts_below:
+            if abuts_above:
+                # case 1
+                self.avail[i - 1][1] = self.avail[i][1]
+                del self.avail[i]
+            else:
+                # case 2
+                self._maybe_increase_max(how, highval)
+                self.avail[i - 1][1] = highval
+        else:
+            if abuts_above:
+                # case 3
+                self._maybe_decrease_min(how, val)
+                self.avail[i][0] = val
+            else:
+                # case 4
+                self._maybe_decrease_min(how, val)
+                self._maybe_increase_max(how, highval)
+                newblock = [val, highval]
+                self.avail.insert(i, newblock)
+
+if __name__ == '__main__':
+    import doctest
+    import sys
+
+    doctest.testmod()
+    if sys.version_info[0] >= 3:
+        xrange = range
+    # run some worst case tests
+    # NB: coalesce is terribly slow when done bottom up
+    r = NumAlloc(0, 2**16 - 1)
+    for i in xrange(r.min_val, r.max_val, 2):
+        r.alloc(i)
+    print('worst case alloc: len(r.avail) = {0}'.format(len(r.avail)))
+    for i in xrange(r.max_val - 1, r.min_val, -2):
+        r.free(i)
+    print('free again; len(r.avail) should be 1; is {0}'.format(len(r.avail)))
+    if len(r.avail) != 1:
+        sys.exit('failure')
Index: lib/lib9p/pytest/p9conn.py
===================================================================
--- /dev/null
+++ lib/lib9p/pytest/p9conn.py
@@ -0,0 +1,1788 @@
+#! /usr/bin/env python
+
+"""
+handle plan9 server <-> client connections
+
+(We can act as either server or client.)
+
+This code needs some doctests or other unit tests...
+"""
+
+import collections
+import errno
+import logging
+import math
+import os
+import socket
+import stat
+import struct
+import sys
+import threading
+import time
+
+import lerrno
+import numalloc
+import p9err
+import pfod
+import protocol
+
+# Timespec based timestamps, if present, have
+# both seconds and nanoseconds.
+Timespec = collections.namedtuple('Timespec', 'sec nsec')
+
+# File attributes from Tgetattr, or given to Tsetattr.
+# (move to protocol.py?)  We use pfod here instead of
+# namedtuple so that we can create instances with all-None
+# fields easily.
+Fileattrs = pfod.pfod('Fileattrs',
+    'ino mode uid gid nlink rdev size blksize blocks '
+    'atime mtime ctime btime gen data_version')
+
+qt2n = protocol.qid_type2name
+
+STD_P9_PORT=564
+
+class P9Error(Exception):
+    pass
+
+class RemoteError(P9Error):
+    """
+    Used when the remote returns an error.  We track the client
+    (connection instance), the operation being attempted, the
+    message, and an error number and type.  The message may be
+    from the Rerror reply, or from converting the errno in a dot-L
+    or dot-u Rerror reply.  The error number may be None if the
+    type is 'Rerror' rather than 'Rlerror'.  The message may be
+    None or empty string if a non-None errno supplies the error
+    instead.
+    """
+    def __init__(self, client, op, msg, etype, errno):
+        self.client = str(client)
+        self.op = op
+        self.msg = msg
+        self.etype = etype # 'Rerror' or 'Rlerror'
+        self.errno = errno # may be None
+        self.message = self._get_message()
+        super(RemoteError, self).__init__(self, self.message)
+
+    def __repr__(self):
+        return ('{0!r}({1}, {2}, {3}, {4}, '
+                '{5})'.format(self.__class__.__name__, self.client, self.op,
+                              self.msg, self.errno, self.etype))
+    def __str__(self):
+        prefix = '{0}: {1}: '.format(self.client, self.op)
+        if self.errno: # check for "is not None", or just non-false-y?
+            name = {'Rerror': '.u', 'Rlerror': 'Linux'}[self.etype]
+            middle = '[{0} error {1}] '.format(name, self.errno)
+        else:
+            middle = ''
+        return '{0}{1}{2}'.format(prefix, middle, self.message)
+
+    def is_ENOTSUP(self):
+        if self.etype == 'Rlerror':
+            return self.errno == lerrno.EOPNOTSUPP
+        return self.errno == errno.EOPNOTSUPP
+
+    def _get_message(self):
+        "get message based on self.msg or self.errno"
+        if self.errno is not None:
+            return {
+                'Rlerror': p9err.dotl_strerror,
+                'Rerror' : p9err.dotu_strerror,
+            }[self.etype](self.errno)
+        return self.msg
+
+class LocalError(P9Error):
+    pass
+
+class TEError(LocalError):
+    pass
+
+class P9SockIO(object):
+    """
+    Common base for server and client, handle send and
+    receive to communications channel.  Note that this
+    need not set up the channel initially, only the logger.
+    The channel is typically connected later.  However, you
+    can provide one initially.
+    """
+    def __init__(self, logger, name=None, server=None, port=STD_P9_PORT):
+        self.logger = logger
+        self.channel = None
+        self.name = name
+        self.maxio = None
+        self.size_coder = struct.Struct('<I')
+        if server is not None:
+            self.connect(server, port)
+        self.max_payload = 2**32 - self.size_coder.size
+
+    def __str__(self):
+        if self.name:
+            return self.name
+        return repr(self)
+
+    def get_recommended_maxio(self):
+        "suggest a max I/O size, for when self.maxio is 0 / unset"
+        return 16 * 4096
+
+    def min_maxio(self):
+        "return a minimum size below which we refuse to work"
+        return self.size_coder.size + 100
+
+    def connect(self, server, port=STD_P9_PORT):
+        """
+        Connect to given server name / IP address.
+
+        If self.name was none, sets self.name to ip:port on success.
+        """
+        if self.is_connected():
+            raise LocalError('already connected')
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
+        sock.connect((server, port))
+        if self.name is None:
+            if port == STD_P9_PORT:
+                name = server
+            else:
+                name = '{0}:{1}'.format(server, port)
+        else:
+            name = None
+        self.declare_connected(sock, name, None)
+
+    def is_connected(self):
+        "predicate: are we connected?"
+        return self.channel != None
+
+    def declare_connected(self, chan, name, maxio):
+        """
+        Now available for normal protocol (size-prefixed) I/O.
+        
+        Replaces chan and name and adjusts maxio, if those
+        parameters are not None.
+        """
+        if maxio:
+            minio = self.min_maxio()
+            if maxio < minio:
+                raise LocalError('maxio={0} < minimum {1}'.format(maxio, minio))
+        if chan is not None:
+            self.channel = chan
+        if name is not None:
+            self.name = name
+        if maxio is not None:
+            self.maxio = maxio
+            self.max_payload = maxio - self.size_coder.size
+
+    def reduce_maxio(self, maxio):
+        "Reduce maximum I/O size per other-side request"
+        minio = self.min_maxio()
+        if maxio < minio:
+            raise LocalError('new maxio={0} < minimum {1}'.format(maxio, minio))
+        if maxio > self.maxio:
+            raise LocalError('new maxio={0} > current {1}'.format(maxio,
+                                                                  self.maxio))
+        self.maxio = maxio
+        self.max_payload = maxio - self.size_coder.size
+
+    def declare_disconnected(self):
+        "Declare comm channel dead (note: leaves self.name set!)"
+        self.channel = None
+        self.maxio = None
+
+    def shutwrite(self):
+        "Do a SHUT_WR on the outbound channel - can't send more"
+        chan = self.channel
+        # we're racing other threads here
+        try:
+            chan.shutdown(socket.SHUT_WR)
+        except (OSError, AttributeError):
+            pass
+
+    def shutdown(self):
+        "Shut down comm channel"
+        if self.channel:
+            try:
+                self.channel.shutdown(socket.SHUT_RDWR)
+            except socket.error:
+                pass
+            self.channel.close()
+            self.declare_disconnected()
+
+    def read(self):
+        """
+        Try to read a complete packet.
+
+        Returns '' for EOF, as read() usually does.
+
+        If we can't even get the size, this still returns ''.
+        If we get a sensible size but are missing some data,
+        we can return a short packet.  Since we know if we did
+        this, we also return a boolean: True means "really got a
+        complete packet."
+
+        Note that '' EOF always returns False: EOF is never a
+        complete packet.
+        """
+        if self.channel is None:
+            return b'', False
+        size_field = self.xread(self.size_coder.size)
+        if len(size_field) < self.size_coder.size:
+            if len(size_field) == 0:
+                self.logger.log(logging.INFO, '%s: normal EOF', self)
+            else:
+                self.logger.log(logging.ERROR,
+                               '%s: EOF while reading size (got %d bytes)',
+                               self, len(size_field))
+                # should we raise an error here?
+            return b'', False
+
+        size = self.size_coder.unpack(size_field)[0] - self.size_coder.size
+        if size <= 0 or size > self.max_payload:
+            self.logger.log(logging.ERROR,
+                            '%s: incoming size %d is insane '
+                            '(max payload is %d)',
+                            self, size, self.max_payload)
+            # indicate EOF - should we raise an error instead, here?
+            return b'', False
+        data = self.xread(size)
+        return data, len(data) == size
+
+    def xread(self, nbytes):
+        """
+        Read nbytes bytes, looping if necessary.  Return '' for
+        EOF; may return a short count if we get some data, then
+        EOF.
+        """
+        assert nbytes > 0
+        # Try to get everything at once (should usually succeed).
+        # Return immediately for EOF or got-all-data.
+        data = self.channel.recv(nbytes)
+        if data == b'' or len(data) == nbytes:
+            return data
+
+        # Gather data fragments into an array, then join it all at
+        # the end.
+        count = len(data)
+        data = [data]
+        while count < nbytes:
+            more = self.channel.recv(nbytes - count)
+            if more == b'':
+                break
+            count += len(more)
+            data.append(more)
+        return b''.join(data)
+
+    def write(self, data):
+        """
+        Write all the data, in the usual encoding.  Note that
+        the length of the data, including the length of the length
+        itself, is already encoded in the first 4 bytes of the
+        data.
+
+        Raises IOError if we can't write everything.
+
+        Raises LocalError if len(data) exceeds max_payload.
+        """
+        size = len(data)
+        assert size >= 4
+        if size > self.max_payload:
+            raise LocalError('data length {0} exceeds '
+                             'maximum {1}'.format(size, self.max_payload))
+        self.channel.sendall(data)
+
+def _pathcat(prefix, suffix):
+    """
+    Concatenate paths we are using on the server side.  This is
+    basically just prefix + / + suffix, with two complications:
+
+    It's possible we don't have a prefix path, in which case
+    we want the suffix without a leading slash.
+
+    It's possible that the prefix is just b'/', in which case we
+    want prefix + suffix.
+    """
+    if prefix:
+        if prefix == b'/':  # or prefix.endswith(b'/')?
+            return prefix + suffix
+        return prefix + b'/' + suffix
+    return suffix
+
+class P9Client(P9SockIO):
+    """
+    Act as client.
+
+    We need the a logger (see logging), a timeout, and a protocol
+    version to request.  By default, we will downgrade to a lower
+    version if asked.
+
+    If server and port are supplied, they are remembered and become
+    the default for .connect() (which is still deferred).
+
+    Note that we keep a table of fid-to-path in self.live_fids,
+    but at any time (except while holding the lock) a fid can
+    be deleted entirely, and the table entry may just be True
+    if we have no path name.  In general, we update the name
+    when we can.
+    """
+    def __init__(self, logger, timeout, version, may_downgrade=True,
+                 server=None, port=None):
+        super(P9Client, self).__init__(logger)
+        self.timeout = timeout
+        self.iproto = protocol.p9_version(version)
+        self.may_downgrade = may_downgrade
+        self.tagalloc = numalloc.NumAlloc(0, 65534)
+        self.tagstate = {}
+        # The next bit is slighlty dirty: perhaps we should just
+        # allocate NOFID out of the 2**32-1 range, so as to avoid
+        # "knowing" that it's 2**32-1.
+        self.fidalloc = numalloc.NumAlloc(0, protocol.td.NOFID - 1)
+        self.live_fids = {}
+        self.rootfid = None
+        self.rootqid = None
+        self.rthread = None
+        self.lock = threading.Lock()
+        self.new_replies = threading.Condition(self.lock)
+        self._monkeywrench = {}
+        self._server = server
+        self._port = port
+        self._unsup = {}
+
+    def get_monkey(self, what):
+        "check for a monkey-wrench"
+        with self.lock:
+            wrench = self._monkeywrench.get(what)
+            if wrench is None:
+                return None
+            if isinstance(wrench, list):
+                # repeats wrench[0] times, or forever if that's 0
+                ret = wrench[1]
+                if wrench[0] > 0:
+                    wrench[0] -= 1
+                    if wrench[0] == 0:
+                        del self._monkeywrench[what]
+            else:
+                ret = wrench
+                del self._monkeywrench[what]
+        return ret
+
+    def set_monkey(self, what, how, repeat=None):
+        """
+        Set a monkey-wrench.  If repeat is not None it is the number of
+        times the wrench is applied (0 means forever, or until you call
+        set again with how=None).  What is what to monkey-wrench, which
+        depends on the op.  How is generally a replacement value.
+        """
+        if how is None:
+            with self.lock:
+                try:
+                    del self._monkeywrench[what]
+                except KeyError:
+                    pass
+            return
+        if repeat is not None:
+            how = [repeat, how]
+        with self.lock:
+            self._monkeywrench[what] = how
+
+    def get_tag(self, for_Tversion=False):
+        "get next available tag ID"
+        with self.lock:
+            if for_Tversion:
+                tag = 65535
+            else:
+                tag = self.tagalloc.alloc()
+            if tag is None:
+                raise LocalError('all tags in use')
+            self.tagstate[tag] = True # ie, in use, still waiting
+        return tag
+
+    def set_tag(self, tag, reply):
+        "set the reply info for the given tag"
+        assert tag >= 0 and tag < 65536
+        with self.lock:
+            # check whether we're still waiting for the tag
+            state = self.tagstate.get(tag)
+            if state is True:
+                self.tagstate[tag] = reply # i.e., here's the answer
+                self.new_replies.notify_all()
+                return
+            # state must be one of these...
+            if state is False:
+                # We gave up on this tag.  Reply came anyway.
+                self.logger.log(logging.INFO,
+                                '%s: got tag %d = %r after timing out on it',
+                                self, tag, reply)
+                self.retire_tag_locked(tag)
+                return
+            if state is None:
+                # We got a tag back from the server that was not
+                # outstanding!
+                self.logger.log(logging.WARNING,
+                                '%s: got tag %d = %r when tag %d not in use!',
+                                self, tag, reply, tag)
+                return
+            # We got a second reply before handling the first reply!
+            self.logger.log(logging.WARNING,
+                            '%s: got tag %d = %r when tag %d = %r!',
+                            self, tag, reply, tag, state)
+            return
+
+    def retire_tag(self, tag):
+        "retire the given tag - only used by the thread that handled the result"
+        if tag == 65535:
+            return
+        assert tag >= 0 and tag < 65535
+        with self.lock:
+            self.retire_tag_locked(tag)
+
+    def retire_tag_locked(self, tag):
+        "retire the given tag while holding self.lock"
+        # must check "in tagstate" because we can race
+        # with retire_all_tags.
+        if tag in self.tagstate:
+            del self.tagstate[tag]
+            self.tagalloc.free(tag)
+
+    def retire_all_tags(self):
+        "retire all tags, after connection drop"
+        with self.lock:
+            # release all tags in any state (waiting, answered, timedout)
+            self.tagalloc.free_multi(self.tagstate.keys())
+            self.tagstate = {}
+            self.new_replies.notify_all()
+
+    def alloc_fid(self):
+        "allocate new fid"
+        with self.lock:
+            fid = self.fidalloc.alloc()
+            self.live_fids[fid] = True
+        return fid
+
+    def getpath(self, fid):
+        "get path from fid, or return None if no path known, or not valid"
+        with self.lock:
+            path = self.live_fids.get(fid)
+        if path is True:
+            path = None
+        return path
+
+    def getpathX(self, fid):
+        """
+        Much like getpath, but return <fid N, unknown path> if necessary.
+        If we do have a path, return its repr().
+        """
+        path = self.getpath(fid)
+        if path is None:
+            return '<fid {0}, unknown path>'.format(fid)
+        return repr(path)
+
+    def setpath(self, fid, path):
+        "associate fid with new path (possibly from another fid)"
+        with self.lock:
+            if isinstance(path, int):
+                path = self.live_fids.get(path)
+            # path might now be None (not a live fid after all), or
+            # True (we have no path name), or potentially even the
+            # empty string (invalid for our purposes).  Treat all of
+            # those as True, meaning "no known path".
+            if not path:
+                path = True
+            if self.live_fids.get(fid):
+                # Existing fid maps to either True or its old path.
+                # Set the new path (which may be just a placeholder).
+                self.live_fids[fid] = path
+
+    def did_rename(self, fid, ncomp, newdir=None):
+        """
+        Announce that we renamed using a fid - we'll try to update
+        other fids based on this (we can't really do it perfectly).
+
+        NOTE: caller must provide a final-component.
+        The caller can supply the new path (and should
+        do so if the rename is not based on the retained path
+        for the supplied fid, i.e., for rename ops where fid
+        can move across directories).  The rules:
+
+         - If newdir is None (default), we use stored path.
+         - Otherwise, newdir provides the best approximation
+           we have to the path that needs ncomp appended.
+
+        (This is based on the fact that renames happen via Twstat
+        or Trename, or Trenameat, which change just one tail component,
+        but the path names vary.)
+        """
+        if ncomp is None:
+            return
+        opath = self.getpath(fid)
+        if newdir is None:
+            if opath is None:
+                return
+            ocomps = opath.split(b'/')
+            ncomps = ocomps[0:-1]
+        else:
+            ocomps = None           # well, none yet anyway
+            ncomps = newdir.split(b'/')
+        ncomps.append(ncomp)
+        if opath is None or opath[0] != '/':
+            # We don't have enough information to fix anything else.
+            # Just store the new path and return.  We have at least
+            # a partial path now, which is no worse than before.
+            npath = b'/'.join(ncomps)
+            with self.lock:
+                if fid in self.live_fids:
+                    self.live_fids[fid] = npath
+            return
+        if ocomps is None:
+            ocomps = opath.split(b'/')
+        olen = len(ocomps)
+        ofinal = ocomps[olen - 1]
+        # Old paths is full path.  Find any other fids that start
+        # with some or all the components in ocomps.  Note that if
+        # we renamed /one/two/three to /four/five this winds up
+        # renaming files /one/a to /four/a, /one/two/b to /four/five/b,
+        # and so on.
+        with self.lock:
+            for fid2, path2 in self.live_fids.iteritems():
+                # Skip fids without byte-string paths
+                if not isinstance(path2, bytes):
+                    continue
+                # Before splitting (which is a bit expensive), try
+                # a straightforward prefix match.  This might give
+                # some false hits, e.g., prefix /one/two/threepenny
+                # starts with /one/two/three, but it quickly eliminates
+                # /raz/baz/mataz and the like.
+                if not path2.startswith(opath):
+                    continue
+                # Split up the path, and use that to make sure that
+                # the final component is a full match.
+                parts2 = path2.split(b'/')
+                if parts2[olen - 1] != ofinal:
+                    continue
+                # OK, path2 starts with the old (renamed) sequence.
+                # Replace the old components with the new ones.
+                # This updates the renamed fid when we come across
+                # it!  It also handles a change in the number of
+                # components, thanks to Python's slice assignment.
+                parts2[0:olen] = ncomps
+                self.live_fids[fid2] = b'/'.join(parts2)
+
+    def retire_fid(self, fid):
+        "retire one fid"
+        with self.lock:
+            self.fidalloc.free(fid)
+            del self.live_fids[fid]
+
+    def retire_all_fids(self):
+        "return live fids to pool"
+        # this is useful for debugging fid leaks:
+        #for fid in self.live_fids:
+        #    print 'retiring', fid, self.getpathX(fid)
+        with self.lock:
+            self.fidalloc.free_multi(self.live_fids.keys())
+            self.live_fids = {}
+
+    def read_responses(self):
+        "Read responses.  This gets spun off as a thread."
+        while self.is_connected():
+            pkt, is_full = super(P9Client, self).read()
+            if pkt == b'':
+                self.shutwrite()
+                self.retire_all_tags()
+                return
+            if not is_full:
+                self.logger.log(logging.WARNING, '%s: got short packet', self)
+            try:
+                # We have one special case: if we're not yet connected
+                # with a version, we must unpack *as if* it's a plain
+                # 9P2000 response.
+                if self.have_version:
+                    resp = self.proto.unpack(pkt)
+                else:
+                    resp = protocol.plain.unpack(pkt)
+            except protocol.SequenceError as err:
+                self.logger.log(logging.ERROR, '%s: bad response: %s',
+                                self, err)
+                try:
+                    resp = self.proto.unpack(pkt, noerror=True)
+                except protocol.SequenceError:
+                    header = self.proto.unpack_header(pkt, noerror=True)
+                    self.logger.log(logging.ERROR,
+                                    '%s: (not even raw-decodable)', self)
+                    self.logger.log(logging.ERROR,
+                                    '%s: header decode produced %r',
+                                    self, header)
+                else:
+                    self.logger.log(logging.ERROR,
+                                    '%s: raw decode produced %r',
+                                    self, resp)
+                # after this kind of problem, probably need to
+                # shut down, but let's leave that out for a bit
+            else:
+                # NB: all protocol responses have a "tag",
+                # so resp['tag'] always exists.
+                self.logger.log(logging.DEBUG, "read_resp: tag %d resp %r", resp.tag, resp)
+                self.set_tag(resp.tag, resp)
+
+    def wait_for(self, tag):
+        """
+        Wait for a response to the given tag.  Return the response,
+        releasing the tag.  If self.timeout is not None, wait at most
+        that long (and release the tag even if there's no reply), else
+        wait forever.
+
+        If this returns None, either the tag was bad initially, or
+        a timeout occurred, or the connection got shut down.
+        """
+        self.logger.log(logging.DEBUG, "wait_for: tag %d", tag)
+        if self.timeout is None:
+            deadline = None
+        else:
+            deadline = time.time() + self.timeout
+        with self.lock:
+            while True:
+                # tagstate is True (waiting) or False (timedout) or
+                # a valid response, or None if we've reset the tag
+                # states (retire_all_tags, after connection drop).
+                resp = self.tagstate.get(tag, None)
+                if resp is None:
+                    # out of sync, exit loop
+                    break
+                if resp is True:
+                    # still waiting for a response - wait some more
+                    self.new_replies.wait(self.timeout)
+                    if deadline and time.time() > deadline:
+                        # Halt the waiting, but go around once more.
+                        # Note we may have killed the tag by now though.
+                        if tag in self.tagstate:
+                            self.tagstate[tag] = False
+                    continue
+                # resp is either False (timeout) or a reply.
+                # If resp is False, change it to None; the tag
+                # is now dead until we get a reply (then we
+                # just toss the reply).
+                # Otherwise, we're done with the tag: free it.
+                # In either case, stop now.
+                if resp is False:
+                    resp = None
+                else:
+                    self.tagalloc.free(tag)
+                    del self.tagstate[tag]
+                break
+        return resp
+
+    def badresp(self, req, resp):
+        """
+        Complain that a response was not something expected.
+        """
+        if resp is None:
+            self.shutdown()
+            raise TEError('{0}: {1}: timeout or EOF'.format(self, req))
+        if isinstance(resp, protocol.rrd.Rlerror):
+            raise RemoteError(self, req, None, 'Rlerror', resp.ecode)
+        if isinstance(resp, protocol.rrd.Rerror):
+            if resp.errnum is None:
+                raise RemoteError(self, req, resp.errstr, 'Rerror', None)
+            raise RemoteError(self, req, None, 'Rerror', resp.errnum)
+        raise LocalError('{0}: {1} got response {2!r}'.format(self, req, resp))
+
+    def supports(self, req_code):
+        """
+        Test self.proto.support(req_code) unless we've recorded that
+        while the protocol supports it, the client does not.
+        """
+        return req_code not in self._unsup and self.proto.supports(req_code)
+
+    def supports_all(self, *req_codes):
+        "basically just all(supports(...))"
+        return all(self.supports(code) for code in req_codes)
+
+    def unsupported(self, req_code):
+        """
+        Record an ENOTSUP (RemoteError was ENOTSUP) for a request.
+        Must be called from the op, this does not happen automatically.
+        (It's just an optimization.)
+        """
+        self._unsup[req_code] = True
+
+    def connect(self, server=None, port=None):
+        """
+        Connect to given server/port pair.
+
+        The server and port are remembered.  If given as None,
+        the last remembered values are used.  The initial
+        remembered values are from the creation of this client
+        instance.
+
+        New values are only remembered here on a *successful*
+        connect, however.
+        """
+        if server is None:
+            server = self._server
+            if server is None:
+                raise LocalError('connect: no server specified and no default')
+        if port is None:
+            port = self._port
+            if port is None:
+                port = STD_P9_PORT
+        self.name = None            # wipe out previous name, if any
+        super(P9Client, self).connect(server, port)
+        maxio = self.get_recommended_maxio()
+        self.declare_connected(None, None, maxio)
+        self.proto = self.iproto    # revert to initial protocol
+        self.have_version = False
+        self.rthread = threading.Thread(target=self.read_responses)
+        self.rthread.start()
+        tag = self.get_tag(for_Tversion=True)
+        req = protocol.rrd.Tversion(tag=tag, msize=maxio,
+                                    version=self.get_monkey('version'))
+        super(P9Client, self).write(self.proto.pack_from(req))
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rversion):
+            self.shutdown()
+            if isinstance(resp, protocol.rrd.Rerror):
+                version = req.version or self.proto.get_version()
+                # for python3, we need to convert version to string
+                if not isinstance(version, str):
+                    version = version.decode('utf-8', 'surrogateescape')
+                raise RemoteError(self, 'version ' + version,
+                                  resp.errstr, 'Rerror', None)
+            self.badresp('version', resp)
+        their_maxio = resp.msize
+        try:
+            self.reduce_maxio(their_maxio)
+        except LocalError as err:
+            raise LocalError('{0}: sent maxio={1}, they tried {2}: '
+                             '{3}'.format(self, maxio, their_maxio,
+                                          err.args[0]))
+        if resp.version != self.proto.get_version():
+            if not self.may_downgrade:
+                self.shutdown()
+                raise LocalError('{0}: they only support '
+                                 'version {1!r}'.format(self, resp.version))
+            # raises LocalError if the version is bad
+            # (should we wrap it with a connect-to-{0} msg?)
+            self.proto = self.proto.downgrade_to(resp.version)
+        self._server = server
+        self._port = port
+        self.have_version = True
+
+    def attach(self, afid, uname, aname, n_uname):
+        """
+        Attach.
+
+        Currently we don't know how to do authentication,
+        but we'll pass any provided afid through.
+        """
+        if afid is None:
+            afid = protocol.td.NOFID
+        if uname is None:
+            uname = ''
+        if aname is None:
+            aname = ''
+        if n_uname is None:
+            n_uname = protocol.td.NONUNAME
+        tag = self.get_tag()
+        fid = self.alloc_fid()
+        pkt = self.proto.Tattach(tag=tag, fid=fid, afid=afid,
+                                 uname=uname, aname=aname,
+                                 n_uname=n_uname)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rattach):
+            self.retire_fid(fid)
+            self.badresp('attach', resp)
+        # probably should check resp.qid
+        self.rootfid = fid
+        self.rootqid = resp.qid
+        self.setpath(fid, b'/')
+
+    def shutdown(self):
+        "disconnect from server"
+        if self.rootfid is not None:
+            self.clunk(self.rootfid, ignore_error=True)
+        self.retire_all_tags()
+        self.retire_all_fids()
+        self.rootfid = None
+        self.rootqid = None
+        super(P9Client, self).shutdown()
+        if self.rthread:
+            self.rthread.join()
+            self.rthread = None
+
+    def dupfid(self, fid):
+        """
+        Copy existing fid to a new fid.
+        """
+        tag = self.get_tag()
+        newfid = self.alloc_fid()
+        pkt = self.proto.Twalk(tag=tag, fid=fid, newfid=newfid, nwname=0,
+                               wname=[])
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rwalk):
+            self.retire_fid(newfid)
+            self.badresp('walk {0}'.format(self.getpathX(fid)), resp)
+        # Copy path too
+        self.setpath(newfid, fid)
+        return newfid
+
+    def lookup(self, fid, components):
+        """
+        Do Twalk.  Caller must provide a starting fid, which should
+        be rootfid to look up from '/' - we do not do / vs . here.
+        Caller must also provide a component-ized path (on purpose,
+        so that caller can provide invalid components like '' or '/').
+        The components must be byte-strings as well, for the same
+        reason.
+
+        We do allocate the new fid ourselves here, though.
+
+        There's no logic here to split up long walks (yet?).
+        """
+        # these are too easy to screw up, so check
+        if self.rootfid is None:
+            raise LocalError('{0}: not attached'.format(self))
+        if (isinstance(components, (str, bytes) or
+            not all(isinstance(i, bytes) for i in components))):
+            raise LocalError('{0}: lookup: invalid '
+                             'components {1!r}'.format(self, components))
+        tag = self.get_tag()
+        newfid = self.alloc_fid()
+        startpath = self.getpath(fid)
+        pkt = self.proto.Twalk(tag=tag, fid=fid, newfid=newfid,
+                               nwname=len(components), wname=components)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rwalk):
+            self.retire_fid(newfid)
+            self.badresp('walk {0} in '
+                         '{1}'.format(components, self.getpathX(fid)),
+                         resp)
+        # Just because we got Rwalk does not mean we got ALL the
+        # way down the path.  Raise OSError(ENOENT) if we're short.
+        if resp.nwqid > len(components):
+            # ??? this should be impossible. Local error?  Remote error?
+            # OS Error?
+            self.clunk(newfid, ignore_error=True)
+            raise LocalError('{0}: walk {1} in {2} returned {3} '
+                             'items'.format(self, components,
+                                            self.getpathX(fid), resp.nwqid))
+        if resp.nwqid < len(components):
+            self.clunk(newfid, ignore_error=True)
+            # Looking up a/b/c and got just a/b, c is what's missing.
+            # Looking up a/b/c and got just a, b is what's missing.
+            missing = components[resp.nwqid]
+            within = _pathcat(startpath, b'/'.join(components[:resp.nwqid]))
+            raise OSError(errno.ENOENT,
+                          '{0}: {1} in {2}'.format(os.strerror(errno.ENOENT),
+                                                   missing, within))
+        self.setpath(newfid, _pathcat(startpath, b'/'.join(components)))
+        return newfid, resp.wqid
+
+    def lookup_last(self, fid, components):
+        """
+        Like lookup, but return only the last component's qid.
+        As a special case, if components is an empty list, we
+        handle that.
+        """
+        rfid, wqid = self.lookup(fid, components)
+        if len(wqid):
+            return rfid, wqid[-1]
+        if fid == self.rootfid:         # usually true, if we get here at all
+            return rfid, self.rootqid
+        tag = self.get_tag()
+        pkt = self.proto.Tstat(tag=tag, fid=rfid)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rstat):
+            self.badresp('stat {0}'.format(self.getpathX(fid)), resp)
+        statval = self.proto.unpack_wirestat(resp.data)
+        return rfid, statval.qid
+
+    def clunk(self, fid, ignore_error=False):
+        "issue clunk(fid)"
+        tag = self.get_tag()
+        pkt = self.proto.Tclunk(tag=tag, fid=fid)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rclunk):
+            if ignore_error:
+                return
+            self.badresp('clunk {0}'.format(self.getpathX(fid)), resp)
+        self.retire_fid(fid)
+
+    def remove(self, fid, ignore_error=False):
+        "issue remove (old style), which also clunks fid"
+        tag = self.get_tag()
+        pkt = self.proto.Tremove(tag=tag, fid=fid)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rremove):
+            if ignore_error:
+                # remove failed: still need to clunk the fid
+                self.clunk(fid, True)
+                return
+            self.badresp('remove {0}'.format(self.getpathX(fid)), resp)
+        self.retire_fid(fid)
+
+    def create(self, fid, name, perm, mode, filetype=None, extension=b''):
+        """
+        Issue create op (note that this may be mkdir, symlink, etc).
+        fid is the directory in which the create happens, and for
+        regular files, it becomes, on success, a fid referring to
+        the now-open file.  perm is, e.g., 0644, 0755, etc.,
+        optionally with additional high bits.  mode is a mode
+        byte (e.g., protocol.td.ORDWR, or OWRONLY|OTRUNC, etc.).
+
+        As a service to callers, we take two optional arguments
+        specifying the file type ('dir', 'symlink', 'device',
+        'fifo', or 'socket') and additional info if needed.
+        The additional info for a symlink is the target of the
+        link (a byte string), and the additional info for a device
+        is a byte string with "b <major> <minor>" or "c <major> <minor>".
+
+        Otherwise, callers can leave filetype=None and encode the bits
+        into the mode (caller must still provide extension if needed).
+
+        We do NOT check whether the extension matches extra DM bits,
+        or that there's only one DM bit set, or whatever, since this
+        is a testing setup.
+        """
+        tag = self.get_tag()
+        if filetype is not None:
+            perm |= {
+                'dir': protocol.td.DMDIR,
+                'symlink': protocol.td.DMSYMLINK,
+                'device': protocol.td.DMDEVICE,
+                'fifo': protocol.td.DMNAMEDPIPE,
+                'socket': protocol.td.DMSOCKET,
+            }[filetype]
+        pkt = self.proto.Tcreate(tag=tag, fid=fid, name=name,
+            perm=perm, mode=mode, extension=extension)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rcreate):
+            self.badresp('create {0} in {1}'.format(name, self.getpathX(fid)),
+                         resp)
+        if resp.qid.type == protocol.td.QTFILE:
+            # Creating a regular file opens the file,
+            # thus changing the fid's path.
+            self.setpath(fid, _pathcat(self.getpath(fid), name))
+        return resp.qid, resp.iounit
+
+    def open(self, fid, mode):
+        "use Topen to open file or directory fid (mode is 1 byte)"
+        tag = self.get_tag()
+        pkt = self.proto.Topen(tag=tag, fid=fid, mode=mode)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Ropen):
+            self.badresp('open {0}'.format(self.getpathX(fid)), resp)
+        return resp.qid, resp.iounit
+
+    def lopen(self, fid, flags):
+        "use Tlopen to open file or directory fid (flags from L_O_*)"
+        tag = self.get_tag()
+        pkt = self.proto.Tlopen(tag=tag, fid=fid, flags=flags)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rlopen):
+            self.badresp('lopen {0}'.format(self.getpathX(fid)), resp)
+        return resp.qid, resp.iounit
+
+    def read(self, fid, offset, count):
+        "read (up to) count bytes from offset, given open fid"
+        tag = self.get_tag()
+        pkt = self.proto.Tread(tag=tag, fid=fid, offset=offset, count=count)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rread):
+            self.badresp('read {0} bytes at offset {1} in '
+                         '{2}'.format(count, offset, self.getpathX(fid)),
+                         resp)
+        return resp.data
+
+    def write(self, fid, offset, data):
+        "write (up to) count bytes to offset, given open fid"
+        tag = self.get_tag()
+        pkt = self.proto.Twrite(tag=tag, fid=fid, offset=offset,
+                                count=len(data), data=data)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rwrite):
+            self.badresp('write {0} bytes at offset {1} in '
+                         '{2}'.format(len(data), offset, self.getpathX(fid)),
+                         resp)
+        return resp.count
+
+    # Caller may
+    #  - pass an actual stat object, or
+    #  - pass in all the individual to-set items by keyword, or
+    #  - mix and match a bit: get an existing stat, then use
+    #    keywords to override fields.
+    # We convert "None"s to the internal "do not change" values,
+    # and for diagnostic purposes, can turn "do not change" back
+    # to None at the end, too.
+    def wstat(self, fid, statobj=None, **kwargs):
+        if statobj is None:
+            statobj = protocol.td.stat()
+        else:
+            statobj = statobj._copy()
+        # Fields in stat that you can't send as a wstat: the
+        # type and qid are informative.  Similarly, the
+        # 'extension' is an input when creating a file but
+        # read-only when stat-ing.
+        #
+        # It's not clear what it means to set dev, but we'll leave
+        # it in as an optional parameter here.  fs/backend.c just
+        # errors out on an attempt to change it.
+        if self.proto == protocol.plain:
+            forbid = ('type', 'qid', 'extension',
+                      'n_uid', 'n_gid', 'n_muid')
+        else:
+            forbid = ('type', 'qid', 'extension')
+        nochange = {
+            'type': 0,
+            'qid': protocol.td.qid(0, 0, 0),
+            'dev': 2**32 - 1,
+            'mode': 2**32 - 1,
+            'atime': 2**32 - 1,
+            'mtime': 2**32 - 1,
+            'length': 2**64 - 1,
+            'name': b'',
+            'uid': b'',
+            'gid': b'',
+            'muid': b'',
+            'extension': b'',
+            'n_uid': 2**32 - 1,
+            'n_gid': 2**32 - 1,
+            'n_muid': 2**32 - 1,
+        }
+        for field in statobj._fields:
+            if field in kwargs:
+                if field in forbid:
+                    raise ValueError('cannot wstat a stat.{0}'.format(field))
+                statobj[field] = kwargs.pop(field)
+            else:
+                if field in forbid or statobj[field] is None:
+                    statobj[field] = nochange[field]
+        if kwargs:
+            raise TypeError('wstat() got an unexpected keyword argument '
+                            '{0!r}'.format(kwargs.popitem()))
+
+        data = self.proto.pack_wirestat(statobj)
+        tag = self.get_tag()
+        pkt = self.proto.Twstat(tag=tag, fid=fid, data=data)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rwstat):
+            # For error viewing, switch all the do-not-change
+            # and can't-change fields to None.
+            statobj.qid = None
+            for field in statobj._fields:
+                if field in forbid:
+                    statobj[field] = None
+                elif field in nochange and statobj[field] == nochange[field]:
+                    statobj[field] = None
+            self.badresp('wstat {0}={1}'.format(self.getpathX(fid), statobj),
+                         resp)
+        # wstat worked - change path names if needed
+        if statobj.name != b'':
+            self.did_rename(fid, statobj.name)
+
+    def readdir(self, fid, offset, count):
+        "read (up to) count bytes of dir data from offset, given open fid"
+        tag = self.get_tag()
+        pkt = self.proto.Treaddir(tag=tag, fid=fid, offset=offset, count=count)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rreaddir):
+            self.badresp('readdir {0} bytes at offset {1} in '
+                         '{2}'.format(count, offset, self.getpathX(fid)),
+                         resp)
+        return resp.data
+
+    def rename(self, fid, dfid, name):
+        "invoke Trename: rename file <fid> to <dfid>/name"
+        tag = self.get_tag()
+        pkt = self.proto.Trename(tag=tag, fid=fid, dfid=dfid, name=name)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rrename):
+            self.badresp('rename {0} to {2} in '
+                         '{1}'.format(self.getpathX(fid),
+                                      self.getpathX(dfid), name),
+                         resp)
+        self.did_rename(fid, name, self.getpath(dfid))
+
+    def renameat(self, olddirfid, oldname, newdirfid, newname):
+        "invoke Trenameat: rename <olddirfid>/oldname to <newdirfid>/newname"
+        tag = self.get_tag()
+        pkt = self.proto.Trenameat(tag=tag,
+                                   olddirfid=olddirfid, oldname=oldname,
+                                   newdirfid=newdirfid, newname=newname)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rrenameat):
+            self.badresp('rename {1} in {0} to {3} in '
+                         '{2}'.format(oldname, self.getpathX(olddirfid),
+                                      newname, self.getpathX(newdirdfid)),
+                         resp)
+        # There's no renamed *fid*, just a renamed file!  So no
+        # call to self.did_rename().
+
+    def unlinkat(self, dirfd, name, flags):
+        "invoke Tunlinkat - flags should be 0 or protocol.td.AT_REMOVEDIR"
+        tag = self.get_tag()
+        pkt = self.proto.Tunlinkat(tag=tag, dirfd=dirfd,
+                                   name=name, flags=flags)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Runlinkat):
+            self.badresp('unlinkat {0} in '
+                         '{1}'.format(name, self.getpathX(dirfd)), resp)
+
+    def decode_stat_objects(self, bstring, noerror=False):
+        """
+        Read on a directory returns an array of stat objects.
+        Note that for .u these encode extra data.
+
+        It's possible for this to produce a SequenceError, if
+        the data are incorrect, unless you pass noerror=True.
+        """
+        objlist = []
+        offset = 0
+        while offset < len(bstring):
+            obj, offset = self.proto.unpack_wirestat(bstring, offset, noerror)
+            objlist.append(obj)
+        return objlist
+
+    def decode_readdir_dirents(self, bstring, noerror=False):
+        """
+        Readdir on a directory returns an array of dirent objects.
+
+        It's possible for this to produce a SequenceError, if
+        the data are incorrect, unless you pass noerror=True.
+        """
+        objlist = []
+        offset = 0
+        while offset < len(bstring):
+            obj, offset = self.proto.unpack_dirent(bstring, offset, noerror)
+            objlist.append(obj)
+        return objlist
+
+    def lcreate(self, fid, name, lflags, mode, gid):
+        "issue lcreate (.L)"
+        tag = self.get_tag()
+        pkt = self.proto.Tlcreate(tag=tag, fid=fid, name=name,
+                                  flags=lflags, mode=mode, gid=gid)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rlcreate):
+            self.badresp('create {0} in '
+                         '{1}'.format(name, self.getpathX(fid)), resp)
+        # Creating a file opens the file,
+        # thus changing the fid's path.
+        self.setpath(fid, _pathcat(self.getpath(fid), name))
+        return resp.qid, resp.iounit
+
+    def mkdir(self, dfid, name, mode, gid):
+        "issue mkdir (.L)"
+        tag = self.get_tag()
+        pkt = self.proto.Tmkdir(tag=tag, dfid=dfid, name=name,
+                                mode=mode, gid=gid)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rmkdir):
+            self.badresp('mkdir {0} in '
+                         '{1}'.format(name, self.getpathX(dfid)), resp)
+        return resp.qid
+
+    # We don't call this getattr(), for the obvious reason.
+    def Tgetattr(self, fid, request_mask=protocol.td.GETATTR_ALL):
+        "issue Tgetattr.L - get what you ask for, or everything by default"
+        tag = self.get_tag()
+        pkt = self.proto.Tgetattr(tag=tag, fid=fid, request_mask=request_mask)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rgetattr):
+            self.badresp('Tgetattr {0} of '
+                         '{1}'.format(request_mask, self.getpathX(fid)), resp)
+        attrs = Fileattrs()
+        # Handle the simplest valid-bit tests:
+        for name in ('mode', 'nlink', 'uid', 'gid', 'rdev',
+                     'size', 'blocks', 'gen', 'data_version'):
+            bit = getattr(protocol.td, 'GETATTR_' + name.upper())
+            if resp.valid & bit:
+                attrs[name] = resp[name]
+        # Handle the timestamps, which are timespec pairs
+        for name in ('atime', 'mtime', 'ctime', 'btime'):
+            bit = getattr(protocol.td, 'GETATTR_' + name.upper())
+            if resp.valid & bit:
+                attrs[name] = Timespec(sec=resp[name + '_sec'],
+                                       nsec=resp[name + '_nsec'])
+        # There is no control bit for blksize; qemu and Linux always
+        # provide one.
+        attrs.blksize = resp.blksize
+        # Handle ino, which comes out of qid.path
+        if resp.valid & protocol.td.GETATTR_INO:
+            attrs.ino = resp.qid.path
+        return attrs
+
+    # We don't call this setattr(), for the obvious reason.
+    # See wstat for usage.  Note that time fields can be set
+    # with either second or nanosecond resolutions, and some
+    # can be set without supplying an actual timestamp, so
+    # this is all pretty ad-hoc.
+    #
+    # There's also one keyword-only argument, ctime=<anything>,
+    # which means "set SETATTR_CTIME".  This has the same effect
+    # as supplying valid=protocol.td.SETATTR_CTIME.
+    def Tsetattr(self, fid, valid=0, attrs=None, **kwargs):
+        if attrs is None:
+            attrs = Fileattrs()
+        else:
+            attrs = attrs._copy()
+
+        # Start with an empty (all-zero) Tsetattr instance.  We
+        # don't really need to zero out tag and fid, but it doesn't
+        # hurt.  Note that if caller says, e.g., valid=SETATTR_SIZE
+        # but does not supply an incoming size (via "attrs" or a size=
+        # argument), we'll ask to set that field to 0.
+        attrobj = protocol.rrd.Tsetattr()
+        for field in attrobj._fields:
+            attrobj[field] = 0
+
+        # In this case, forbid means "only as kwargs": these values
+        # in an incoming attrs object are merely ignored.
+        forbid = ('ino', 'nlink', 'rdev', 'blksize', 'blocks', 'btime',
+                  'gen', 'data_version')
+        for field in attrs._fields:
+            if field in kwargs:
+                if field in forbid:
+                    raise ValueError('cannot Tsetattr {0}'.format(field))
+                attrs[field] = kwargs.pop(field)
+            elif attrs[field] is None:
+                continue
+            # OK, we're setting this attribute.  Many are just
+            # numeric - if that's the case, we're good, set the
+            # field and the appropriate bit.
+            bitname = 'SETATTR_' + field.upper()
+            bit = getattr(protocol.td, bitname)
+            if field in ('mode', 'uid', 'gid', 'size'):
+                valid |= bit
+                attrobj[field] = attrs[field]
+                continue
+            # Timestamps are special:  The value may be given as
+            # an integer (seconds), or as a float (we convert to
+            # (we convert to sec+nsec), or as a timespec (sec+nsec).
+            # If specified as 0, we mean "we are not providing the
+            # actual time, use the server's time."
+            #
+            # The ctime field's value, if any, is *ignored*.
+            if field in ('atime', 'mtime'):
+                value = attrs[field]
+                if hasattr(value, '__len__'):
+                    if len(value) != 2:
+                        raise ValueError('invalid {0}={1!r}'.format(field,
+                                                                    value))
+                    sec = value[0]
+                    nsec = value[1]
+                else:
+                    sec = value
+                    if isinstance(sec, float):
+                        nsec, sec = math.modf(sec)
+                        nsec = int(round(nsec * 1000000000))
+                    else:
+                        nsec = 0
+                valid |= bit
+                attrobj[field + '_sec'] = sec
+                attrobj[field + '_nsec'] = nsec
+                if sec != 0 or nsec != 0:
+                    # Add SETATTR_ATIME_SET or SETATTR_MTIME_SET
+                    # as appropriate, to tell the server to *this
+                    # specific* time, instead of just "server now".
+                    bit = getattr(protocol.td, bitname + '_SET')
+                    valid |= bit
+        if 'ctime' in kwargs:
+            kwargs.pop('ctime')
+            valid |= protocol.td.SETATTR_CTIME
+        if kwargs:
+            raise TypeError('Tsetattr() got an unexpected keyword argument '
+                            '{0!r}'.format(kwargs.popitem()))
+
+        tag = self.get_tag()
+        attrobj.valid = valid
+        attrobj.tag = tag
+        attrobj.fid = fid
+        pkt = self.proto.pack(attrobj)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rsetattr):
+            self.badresp('Tsetattr {0} {1} of '
+                         '{2}'.format(valid, attrs, self.getpathX(fid)), resp)
+
+    def xattrwalk(self, fid, name=None):
+        "walk one name or all names: caller should read() the returned fid"
+        tag = self.get_tag()
+        newfid = self.alloc_fid()
+        pkt = self.proto.Txattrwalk(tag=tag, fid=fid, newfid=newfid,
+                                    name=name or '')
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if not isinstance(resp, protocol.rrd.Rxattrwalk):
+            self.retire_fid(newfid)
+            self.badresp('Txattrwalk {0} of '
+                         '{1}'.format(name, self.getpathX(fid)), resp)
+        if name:
+            self.setpath(newfid, 'xattr:' + name)
+        else:
+            self.setpath(newfid, 'xattr')
+        return newfid, resp.size
+
+    def _pathsplit(self, path, startdir, allow_empty=False):
+        "common code for uxlookup and uxopen"
+        if self.rootfid is None:
+            raise LocalError('{0}: not attached'.format(self))
+        if path.startswith(b'/') or startdir is None:
+            startdir = self.rootfid
+        components = [i for i in path.split(b'/') if i != b'']
+        if len(components) == 0 and not allow_empty:
+            raise LocalError('{0}: {1!r}: empty path'.format(self, path))
+        return components, startdir
+
+    def uxlookup(self, path, startdir=None):
+        """
+        Unix-style lookup.  That is, lookup('/foo/bar') or
+        lookup('foo/bar').  If startdir is not None and the
+        path does not start with '/' we look up from there.
+        """
+        components, startdir = self._pathsplit(path, startdir, allow_empty=True)
+        return self.lookup_last(startdir, components)
+
+    def uxopen(self, path, oflags=0, perm=None, gid=None,
+               startdir=None, filetype=None):
+        """
+        Unix-style open()-with-option-to-create, or mkdir().
+        oflags is 0/1/2 with optional os.O_CREAT, perm defaults
+        to 0o666 (files) or 0o777 (directories).  If we use
+        a Linux create or mkdir op, we will need a gid, but it's
+        not required if you are opening an existing file.
+
+        Adds a final boolean value for "did we actually create".
+        Raises OSError if you ask for a directory but it's a file,
+        or vice versa.  (??? reconsider this later)
+
+        Note that this does not handle other file types, only
+        directories.
+        """
+        needtype = {
+            'dir': protocol.td.QTDIR,
+            None: protocol.td.QTFILE,
+        }[filetype]
+        omode_byte = oflags & 3 # cheating
+        # allow looking up /, but not creating /
+        allow_empty = (oflags & os.O_CREAT) == 0
+        components, startdir = self._pathsplit(path, startdir,
+                                               allow_empty=allow_empty)
+        if not (oflags & os.O_CREAT):
+            # Not creating, i.e., just look up and open existing file/dir.
+            fid, qid = self.lookup_last(startdir, components)
+            # If we got this far, use Topen on the fid; we did not
+            # create the file.
+            return self._uxopen2(path, needtype, fid, qid, omode_byte, False)
+
+        # Only used if using dot-L, but make sure it's always provided
+        # since this is generic.
+        if gid is None:
+            raise ValueError('gid is required when creating file or dir')
+
+        if len(components) > 1:
+            # Look up all but last component; this part must succeed.
+            fid, _ = self.lookup(startdir, components[:-1])
+
+            # Now proceed with the final component, using fid
+            # as the start dir.  Remember to clunk it!
+            startdir = fid
+            clunk_startdir = True
+            components = components[-1:]
+        else:
+            # Use startdir as the start dir, and get a new fid.
+            # Do not clunk startdir!
+            clunk_startdir = False
+            fid = self.alloc_fid()
+
+        # Now look up the (single) component.  If this fails,
+        # assume the file or directory needs to be created.
+        tag = self.get_tag()
+        pkt = self.proto.Twalk(tag=tag, fid=startdir, newfid=fid,
+                               nwname=1, wname=components)
+        super(P9Client, self).write(pkt)
+        resp = self.wait_for(tag)
+        if isinstance(resp, protocol.rrd.Rwalk):
+            if clunk_startdir:
+                self.clunk(startdir, ignore_error=True)
+            # fid successfully walked to refer to final component.
+            # Just need to actually open the file.
+            self.setpath(fid, _pathcat(self.getpath(startdir), components[0]))
+            qid = resp.wqid[0]
+            return self._uxopen2(needtype, fid, qid, omode_byte, False)
+
+        # Walk failed.  If we allocated a fid, retire it.  Then set
+        # up a fid that points to the parent directory in which to
+        # create the file or directory.  Note that if we're creating
+        # a file, this fid will get changed so that it points to the
+        # file instead of the directory, but if we're creating a
+        # directory, it will be unchanged.
+        if fid != startdir:
+            self.retire_fid(fid)
+        fid = self.dupfid(startdir)
+
+        try:
+            qid, iounit = self._uxcreate(filetype, fid, components[0],
+                                         oflags, omode_byte, perm, gid)
+
+            # Success.  If we created an ordinary file, we have everything
+            # now as create alters the incoming (dir) fid to open the file.
+            # Otherwise (mkdir), we need to open the file, as with
+            # a successful lookup.
+            #
+            # Note that qid type should match "needtype".
+            if filetype != 'dir':
+                if qid.type == needtype:
+                    return fid, qid, iounit, True
+                self.clunk(fid, ignore_error=True)
+                raise OSError(_wrong_file_type(qid),
+                             '{0}: server told to create {1} but '
+                             'created {2} instead'.format(path,
+                                                          qt2n(needtype),
+                                                          qt2n(qid.type)))
+
+            # Success: created dir; but now need to walk to and open it.
+            fid = self.alloc_fid()
+            tag = self.get_tag()
+            pkt = self.proto.Twalk(tag=tag, fid=startdir, newfid=fid,
+                                   nwname=1, wname=components)
+            super(P9Client, self).write(pkt)
+            resp = self.wait_for(tag)
+            if not isinstance(resp, protocol.rrd.Rwalk):
+                self.clunk(fid, ignore_error=True)
+                raise OSError(errno.ENOENT,
+                              '{0}: server made dir but then failed to '
+                              'find it again'.format(path))
+                self.setpath(fid, _pathcat(self.getpath(fid), components[0]))
+            return self._uxopen2(needtype, fid, qid, omode_byte, True)
+        finally:
+            # Regardless of success/failure/exception, make sure
+            # we clunk startdir if needed.
+            if clunk_startdir:
+                self.clunk(startdir, ignore_error=True)
+
+    def _uxcreate(self, filetype, fid, name, oflags, omode_byte, perm, gid):
+        """
+        Helper for creating dir-or-file.  The fid argument is the
+        parent directory on input, but will point to the file (if
+        we're creating a file) on return.  oflags only applies if
+        we're creating a file (even then we use omode_byte if we
+        are using the plan9 create op).
+        """
+        # Try to create or mkdir as appropriate.
+        if self.supports_all(protocol.td.Tlcreate, protocol.td.Tmkdir):
+            # Use Linux style create / mkdir.
+            if filetype == 'dir':
+                if perm is None:
+                    perm = 0o777
+                return self.mkdir(startdir, name, perm, gid), None
+            if perm is None:
+                perm = 0o666
+            lflags = flags_to_linux_flags(oflags)
+            return self.lcreate(fid, name, lflags, perm, gid)
+
+        if filetype == 'dir':
+            if perm is None:
+                perm = protocol.td.DMDIR | 0o777
+            else:
+                perm |= protocol.td.DMDIR
+        else:
+            if perm is None:
+                perm = 0o666
+        return self.create(fid, name, perm, omode_byte)
+
+    def _uxopen2(self, needtype, fid, qid, omode_byte, didcreate):
+        "common code for finishing up uxopen"
+        if qid.type != needtype:
+            self.clunk(fid, ignore_error=True)
+            raise OSError(_wrong_file_type(qid),
+                          '{0}: is {1}, expected '
+                          '{2}'.format(path, qt2n(qid.type), qt2n(needtype)))
+        qid, iounit = self.open(fid, omode_byte)
+        # ? should we re-check qid? it should not have changed
+        return fid, qid, iounit, didcreate
+
+    def uxmkdir(self, path, perm, gid, startdir=None):
+        """
+        Unix-style mkdir.
+
+        The gid is only applied if we are using .L style mkdir.
+        """
+        components, startdir = self._pathsplit(path, startdir)
+        clunkme = None
+        if len(components) > 1:
+            fid, _ = self.lookup(startdir, components[:-1])
+            startdir = fid
+            clunkme = fid
+            components = components[-1:]
+        try:
+            if self.supports(protocol.td.Tmkdir):
+                qid = self.mkdir(startdir, components[0], perm, gid)
+            else:
+                qid, _ = self.create(startdir, components[0],
+                                     protocol.td.DMDIR | perm,
+                                     protocol.td.OREAD)
+                # Should we chown/chgrp the dir?
+        finally:
+            if clunkme:
+                self.clunk(clunkme, ignore_error=True)
+        return qid
+
+    def uxreaddir(self, path, startdir=None, no_dotl=False):
+        """
+        Read a directory to get a list of names (which may or may not
+        include '.' and '..').
+
+        If no_dotl is True (or anything non-false-y), this uses the
+        plain or .u readdir format, otherwise it uses dot-L readdir
+        if possible.
+        """
+        components, startdir = self._pathsplit(path, startdir, allow_empty=True)
+        fid, qid = self.lookup_last(startdir, components)
+        try:
+            if qid.type != protocol.td.QTDIR:
+                raise OSError(errno.ENOTDIR,
+                              '{0}: {1}'.format(self.getpathX(fid),
+                                                os.strerror(errno.ENOTDIR)))
+            # We need both Tlopen and Treaddir to use Treaddir.
+            if not self.supports_all(protocol.td.Tlopen, protocol.td.Treaddir):
+                no_dotl = True
+            if no_dotl:
+                statvals = self.uxreaddir_stat_fid(fid)
+                return [i.name for i in statvals]
+
+            dirents = self.uxreaddir_dotl_fid(fid)
+            return [dirent.name for dirent in dirents]
+        finally:
+            self.clunk(fid, ignore_error=True)
+
+    def uxreaddir_stat(self, path, startdir=None):
+        """
+        Use directory read to get plan9 style stat data (plain or .u readdir).
+
+        Note that this gets a fid, then opens it, reads, then clunks
+        the fid.  If you already have a fid, you may want to use
+        uxreaddir_stat_fid (but note that this opens, yet does not
+        clunk, the fid).
+
+        We return the qid plus the list of the contents.  If the
+        target is not a directory, the qid will not have type QTDIR
+        and the contents list will be empty.
+
+        Raises OSError if this is applied to a non-directory.
+        """
+        components, startdir = self._pathsplit(path, startdir)
+        fid, qid = self.lookup_last(startdir, components)
+        try:
+            if qid.type != protocol.td.QTDIR:
+                raise OSError(errno.ENOTDIR,
+                              '{0}: {1}'.format(self.getpathX(fid),
+                                                os.strerror(errno.ENOTDIR)))
+            statvals = self.ux_readdir_stat_fid(fid)
+            return qid, statvals
+        finally:
+            self.clunk(fid, ignore_error=True)
+
+    def uxreaddir_stat_fid(self, fid):
+        """
+        Implement readdir loop that extracts stat values.
+        This opens, but does not clunk, the given fid.
+
+        Unlike uxreaddir_stat(), if this is applied to a file,
+        rather than a directory, it just returns no entries.
+        """
+        statvals = []
+        qid, iounit = self.open(fid, protocol.td.OREAD)
+        # ?? is a zero iounit allowed? if so, what do we use here?
+        if qid.type == protocol.td.QTDIR:
+            if iounit <= 0:
+                iounit = 512 # probably good enough
+            offset = 0
+            while True:
+                bstring = self.read(fid, offset, iounit)
+                if bstring == b'':
+                    break
+                statvals.extend(self.decode_stat_objects(bstring))
+                offset += len(bstring)
+        return statvals
+
+    def uxreaddir_dotl_fid(self, fid):
+        """
+        Implement readdir loop that uses dot-L style dirents.
+        This opens, but does not clunk, the given fid.
+
+        If applied to a file, the lopen should fail, because of the
+        L_O_DIRECTORY flag.
+        """
+        dirents = []
+        qid, iounit = self.lopen(fid, protocol.td.OREAD |
+                                      protocol.td.L_O_DIRECTORY)
+        # ?? is a zero iounit allowed? if so, what do we use here?
+        # but, we want a minimum of over 256 anyway, let's go for 512
+        if iounit < 512:
+            iounit = 512
+        offset = 0
+        while True:
+            bstring = self.readdir(fid, offset, iounit)
+            if bstring == b'':
+                break
+            ents = self.decode_readdir_dirents(bstring)
+            if len(ents) == 0:
+                break               # ???
+            dirents.extend(ents)
+            offset = ents[-1].offset
+        return dirents
+
+    def uxremove(self, path, startdir=None, filetype=None,
+                 force=False, recurse=False):
+        """
+        Implement rm / rmdir, with optional -rf.
+        if filetype is None, remove dir or file.  If 'dir' or 'file'
+        remove only if it's one of those.  If force is set, ignore
+        failures to remove.  If recurse is True, remove contents of
+        directories (recursively).
+
+        File type mismatches (when filetype!=None) raise OSError (?).
+        """
+        components, startdir = self._pathsplit(path, startdir, allow_empty=True)
+        # Look up all components. If
+        # we get an error we'll just assume the file does not
+        # exist (is this good?).
+        try:
+            fid, qid = self.lookup_last(startdir, components)
+        except RemoteError:
+            return
+        if qid.type == protocol.td.QTDIR:
+            # it's a directory, remove only if allowed.
+            # Note that we must check for "rm -r /" (len(components)==0).
+            if filetype == 'file':
+                self.clunk(fid, ignore_error=True)
+                raise OSError(_wrong_file_type(qid),
+                              '{0}: is dir, expected file'.format(path))
+            isroot = len(components) == 0
+            closer = self.clunk if isroot else self.remove
+            if recurse:
+                # NB: _rm_recursive does not clunk fid
+                self._rm_recursive(fid, filetype, force)
+            # This will fail if the directory is non-empty, unless of
+            # course we tell it to ignore error.
+            closer(fid, ignore_error=force)
+            return
+        # Not a directory, call it a file (even if socket or fifo etc).
+        if filetype == 'dir':
+            self.clunk(fid, ignore_error=True)
+            raise OSError(_wrong_file_type(qid),
+                          '{0}: is file, expected dir'.format(path))
+        self.remove(fid, ignore_error=force)
+
+    def _rm_file_by_dfid(self, dfid, name, force=False):
+        """
+        Remove a file whose name is <name> (no path, just a component
+        name) whose parent directory is <dfid>.  We may assume that the
+        file really is a file (or a socket, or fifo, or some such, but
+        definitely not a directory).
+
+        If force is set, ignore failures.
+        """
+        # If we have unlinkat, that's the fast way.  But it may
+        # return an ENOTSUP error.  If it does we shouldn't bother
+        # doing this again.
+        if self.supports(protocol.td.Tunlinkat):
+            try:
+                self.unlinkat(dfid, name, 0)
+                return
+            except RemoteError as err:
+                if not err.is_ENOTSUP():
+                    raise
+                self.unsupported(protocol.td.Tunlinkat)
+                # fall through to remove() op
+        # Fall back to lookup + remove.
+        try:
+            fid, qid = self.lookup_last(dfid, [name])
+        except RemoteError:
+            # If this has an errno we could tell ENOENT from EPERM,
+            # and actually raise an error for the latter.  Should we?
+            return
+        self.remove(fid, ignore_error=force)
+
+    def _rm_recursive(self, dfid, filetype, force):
+        """
+        Recursively remove a directory.  filetype is probably None,
+        but if it's 'dir' we fail if the directory contains non-dir
+        files.
+
+        If force is set, ignore failures.
+
+        Although we open dfid (via the readdir.*_fid calls) we
+        do not clunk it here; that's the caller's job.
+        """
+        # first, remove contents
+        if self.supports_all(protocol.td.Tlopen, protocol.td.Treaddir):
+            for entry in self.uxreaddir_dotl_fid(dfid):
+                if entry.name in (b'.', b'..'):
+                    continue
+                fid, qid = self.lookup(dfid, [entry.name])
+                try:
+                    attrs = self.Tgetattr(fid, protocol.td.GETATTR_MODE)
+                    if stat.S_ISDIR(attrs.mode):
+                        self.uxremove(entry.name, dfid, filetype, force, True)
+                    else:
+                        self.remove(fid)
+                        fid = None
+                finally:
+                    if fid is not None:
+                        self.clunk(fid, ignore_error=True)
+        else:
+            for statobj in self.uxreaddir_stat_fid(dfid):
+                # skip . and ..
+                name = statobj.name
+                if name in (b'.', b'..'):
+                    continue
+                if statobj.qid.type == protocol.td.QTDIR:
+                    self.uxremove(name, dfid, filetype, force, True)
+                else:
+                    self._rm_file_by_dfid(dfid, name, force)
+
+def _wrong_file_type(qid):
+    "return EISDIR or ENOTDIR for passing to OSError"
+    if qid.type == protocol.td.QTDIR:
+        return errno.EISDIR
+    return errno.ENOTDIR
+
+def flags_to_linux_flags(flags):
+    """
+    Convert OS flags (O_CREAT etc) to Linux flags (protocol.td.L_O_CREAT etc).
+    """
+    flagmap = {
+        os.O_CREAT: protocol.td.L_O_CREAT,
+        os.O_EXCL: protocol.td.L_O_EXCL,
+        os.O_NOCTTY: protocol.td.L_O_NOCTTY,
+        os.O_TRUNC: protocol.td.L_O_TRUNC,
+        os.O_APPEND: protocol.td.L_O_APPEND,
+        os.O_DIRECTORY: protocol.td.L_O_DIRECTORY,
+    }
+
+    result = flags & os.O_RDWR
+    flags &= ~os.O_RDWR
+    for key, value in flagmap.iteritems():
+        if flags & key:
+            result |= value
+            flags &= ~key
+    if flags:
+        raise ValueError('untranslated bits 0x{0:x} in os flags'.format(flags))
+    return result
Index: lib/lib9p/pytest/p9err.py
===================================================================
--- /dev/null
+++ lib/lib9p/pytest/p9err.py
@@ -0,0 +1,146 @@
+#! /usr/bin/env python
+
+"""
+Error number definitions for 9P2000, .u, and .L.
+
+Note that there is no native-to-9P2000 (plain) translation
+table since 9P2000 takes error *strings* rather than error
+*numbers*.
+"""
+
+import errno as _errno
+import lerrno as _lerrno
+import os as _os
+
+_native_to_dotu = {
+    # These are in the "standard" range(1, errno.ERANGE)
+    # but do not map to themselves, so map them here first.
+    _errno.ENOTEMPTY: _errno.EPERM,
+    _errno.EDQUOT: _errno.EPERM,
+    _errno.ENOSYS: _errno.EPERM,
+}
+
+_native_to_dotl = {}
+
+# Add standard errno's.
+for _i in range(1, _errno.ERANGE):
+    _native_to_dotu.setdefault(_i, _i)
+    _native_to_dotl[_i] = _i
+
+# Add linux errno's.  Note that Linux EAGAIN at #11 overrides BSD EDEADLK,
+# but Linux has EDEADLK at #35 which overrides BSD EAGAIN, so it all
+# works out.
+#
+# We just list every BSD error name here, since the hasattr()s do
+# the real work.
+for _i in (
+    'EDEADLK',
+    'EAGAIN',
+    'EINPROGRESS',
+    'EALREADY',
+    'ENOTSOCK',
+    'EDESTADDRREQ',
+    'EMSGSIZE',
+    'EPROTOTYPE',
+    'ENOPROTOOPT',
+    'EPROTONOSUPPORT',
+    'ESOCKTNOSUPPORT',
+    'EOPNOTSUPP',
+    'EPFNOSUPPORT',
+    'EAFNOSUPPORT',
+    'EADDRINUSE',
+    'EADDRNOTAVAIL',
+    'ENETDOWN',
+    'ENETUNREACH',
+    'ENETRESET',
+    'ECONNABORTED',
+    'ECONNRESET',
+    'ENOBUFS',
+    'EISCONN',
+    'ENOTCONN',
+    'ESHUTDOWN',
+    'ETOOMANYREFS',
+    'ETIMEDOUT',
+    'ECONNREFUSED',
+    'ELOOP',
+    'ENAMETOOLONG',
+    'EHOSTDOWN',
+    'EHOSTUNREACH',
+    'ENOTEMPTY',
+    'EPROCLIM',
+    'EUSERS',
+    'EDQUOT',
+    'ESTALE',
+    'EREMOTE',
+    'EBADRPC',
+    'ERPCMISMATCH',
+    'EPROGUNAVAIL',
+    'EPROGMISMATCH',
+    'EPROCUNAVAIL',
+    'ENOLCK',
+    'ENOSYS',
+    'EFTYPE',
+    'EAUTH',
+    'ENEEDAUTH',
+    'EIDRM',
+    'ENOMSG',
+    'EOVERFLOW',
+    'ECANCELED',
+    'EILSEQ',
+    'EDOOFUS',
+    'EBADMSG',
+    'EMULTIHOP',
+    'ENOLINK',
+    'EPROTO',
+    'ENOTCAPABLE',
+    'ECAPMODE',
+    'ENOTRECOVERABLE',
+    'EOWNERDEAD',
+):
+    if hasattr(_errno, _i) and hasattr(_lerrno, _i):
+        _native_to_dotl[getattr(_errno, _i)] = getattr(_lerrno, _i)
+del _i
+
+def to_dotu(errnum):
+    """
+    Translate native errno to 9P2000.u errno.
+
+    >>> import errno
+    >>> to_dotu(errno.EIO)
+    5
+    >>> to_dotu(errno.EDQUOT)
+    1
+    >>> to_dotu(errno.ELOOP)
+    5
+
+    There is a corresponding dotu_strerror() (which is really
+    just os.strerror):
+
+    >>> dotu_strerror(5)
+    'Input/output error'
+
+    """
+    return _native_to_dotu.get(errnum, _errno.EIO) # default to EIO
+
+def to_dotl(errnum):
+    """
+    Translate native errno to 9P2000.L errno.
+
+    >>> import errno
+    >>> to_dotl(errno.ELOOP)
+    40
+
+    There is a corresponding dotl_strerror():
+
+    >>> dotl_strerror(40)
+    'Too many levels of symbolic links'
+    """
+    return _native_to_dotl.get(errnum, _lerrno.ENOTRECOVERABLE)
+
+dotu_strerror = _os.strerror
+
+dotl_strerror = _lerrno.strerror
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
Index: lib/lib9p/pytest/pfod.py
===================================================================
--- /dev/null
+++ lib/lib9p/pytest/pfod.py
@@ -0,0 +1,204 @@
+#! /usr/bin/env python
+
+from __future__ import print_function
+
+__all__ = ['pfod', 'OrderedDict']
+
+### shameless stealing from namedtuple here
+
+"""
+pfod - prefilled OrderedDict
+
+This is basically a hybrid of a class and an OrderedDict,
+or, sort of a data-only class.  When an instance of the
+class is created, all its fields are set to None if not
+initialized.
+
+Because it is an OrderedDict you can add extra fields to an
+instance, and they will be in inst.keys().  Because it
+behaves in a class-like way, if the keys are 'foo' and 'bar'
+you can write print(inst.foo) or inst.bar = 3.  Setting an
+attribute that does not currently exist causes a new key
+to be added to the instance.
+"""
+
+import sys as _sys
+from keyword import iskeyword as _iskeyword
+from collections import OrderedDict
+from collections import deque as _deque
+
+_class_template = '''\
+class {typename}(OrderedDict):
+    '{typename}({arg_list})'
+    __slots__ = ()
+
+    _fields = {field_names!r}
+
+    def __init__(self, *args, **kwargs):
+        'Create new instance of {typename}()'
+        super({typename}, self).__init__()
+        args = _deque(args)
+        for field in self._fields:
+            if field in kwargs:
+                self[field] = kwargs.pop(field)
+            elif len(args) > 0:
+                self[field] = args.popleft()
+            else:
+                self[field] = None
+        if len(kwargs):
+            raise TypeError('unexpected kwargs %s' % kwargs.keys())
+        if len(args):
+            raise TypeError('unconsumed args %r' % tuple(args))
+
+    def _copy(self):
+        'copy to new instance'
+        new = {typename}()
+        new.update(self)
+        return new
+
+    def __getattr__(self, attr):
+        if attr in self:
+            return self[attr]
+        raise AttributeError('%r object has no attribute %r' %
+            (self.__class__.__name__, attr))
+
+    def __setattr__(self, attr, val):
+        if attr.startswith('_OrderedDict_'):
+            super({typename}, self).__setattr__(attr, val)
+        else:
+            self[attr] = val
+
+    def __repr__(self):
+        'Return a nicely formatted representation string'
+        return '{typename}({repr_fmt})'.format(**self)
+'''
+
+_repr_template = '{name}={{{name}!r}}'
+
+# Workaround for py2k exec-as-statement, vs py3k exec-as-function.
+# Since the syntax differs, we have to exec the definition of _exec!
+if _sys.version_info[0] < 3:
+    # py2k: need a real function.  (There is a way to deal with
+    # this without a function if the py2k is new enough, but this
+    # works in more cases.)
+    exec("""def _exec(string, gdict, ldict):
+        "Python 2: exec string in gdict, ldict"
+        exec string in gdict, ldict""")
+else:
+    # py3k: just make an alias for builtin function exec
+    exec("_exec = exec")
+
+def pfod(typename, field_names, verbose=False, rename=False):
+    """
+    Return a new subclass of OrderedDict with named fields.
+
+    Fields are accessible by name.  Note that this means
+    that to copy a PFOD you must use _copy() - field names
+    may not start with '_' unless they are all numeric.
+
+    When creating an instance of the new class, fields
+    that are not initialized are set to None.
+
+    >>> Point = pfod('Point', ['x', 'y'])
+    >>> Point.__doc__                   # docstring for the new class
+    'Point(x, y)'
+    >>> p = Point(11, y=22)             # instantiate with positional args or keywords
+    >>> p
+    Point(x=11, y=22)
+    >>> p['x'] + p['y']                 # indexable
+    33
+    >>> p.x + p.y                       # fields also accessable by name
+    33
+    >>> p._copy()
+    Point(x=11, y=22)
+    >>> p2 = Point()
+    >>> p2.extra = 2
+    >>> p2
+    Point(x=None, y=None)
+    >>> p2.extra
+    2
+    >>> p2['extra']
+    2
+    """
+
+    # Validate the field names.  At the user's option, either generate an error
+    if _sys.version_info[0] >= 3:
+        string_type = str
+    else:
+        string_type = basestring
+    # message or automatically replace the field name with a valid name.
+    if isinstance(field_names, string_type):
+        field_names = field_names.replace(',', ' ').split()
+    field_names = list(map(str, field_names))
+    typename = str(typename)
+    if rename:
+        seen = set()
+        for index, name in enumerate(field_names):
+            if (not all(c.isalnum() or c=='_' for c in name)
+                or _iskeyword(name)
+                or not name
+                or name[0].isdigit()
+                or name.startswith('_')
+                or name in seen):
+                field_names[index] = '_%d' % index
+            seen.add(name)
+    for name in [typename] + field_names:
+        if type(name) != str:
+            raise TypeError('Type names and field names must be strings')
+        if not all(c.isalnum() or c=='_' for c in name):
+            raise ValueError('Type names and field names can only contain '
+                             'alphanumeric characters and underscores: %r' % name)
+        if _iskeyword(name):
+            raise ValueError('Type names and field names cannot be a '
+                             'keyword: %r' % name)
+        if name[0].isdigit():
+            raise ValueError('Type names and field names cannot start with '
+                             'a number: %r' % name)
+    seen = set()
+    for name in field_names:
+        if name.startswith('_OrderedDict_'):
+            raise ValueError('Field names cannot start with _OrderedDict_: '
+                             '%r' % name)
+        if name.startswith('_') and not rename:
+            raise ValueError('Field names cannot start with an underscore: '
+                             '%r' % name)
+        if name in seen:
+            raise ValueError('Encountered duplicate field name: %r' % name)
+        seen.add(name)
+
+    # Fill-in the class template
+    class_definition = _class_template.format(
+        typename = typename,
+        field_names = tuple(field_names),
+        arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
+        repr_fmt = ', '.join(_repr_template.format(name=name)
+                             for name in field_names),
+    )
+    if verbose:
+        print(class_definition,
+            file=verbose if isinstance(verbose, file) else _sys.stdout)
+
+    # Execute the template string in a temporary namespace and support
+    # tracing utilities by setting a value for frame.f_globals['__name__']
+    namespace = dict(__name__='PFOD%s' % typename,
+                     OrderedDict=OrderedDict, _deque=_deque)
+    try:
+        _exec(class_definition, namespace, namespace)
+    except SyntaxError as e:
+        raise SyntaxError(e.message + ':\n' + class_definition)
+    result = namespace[typename]
+
+    # For pickling to work, the __module__ variable needs to be set to the frame
+    # where the named tuple is created.  Bypass this step in environments where
+    # sys._getframe is not defined (Jython for example) or sys._getframe is not
+    # defined for arguments greater than 0 (IronPython).
+    try:
+        result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
+    except (AttributeError, ValueError):
+        pass
+
+    return result
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
Index: lib/lib9p/pytest/protocol.py
===================================================================
--- /dev/null
+++ lib/lib9p/pytest/protocol.py
@@ -0,0 +1,1998 @@
+#! /usr/bin/env python
+
+"""
+Protocol definitions for python based lib9p server/client.
+
+The sub-namespace td has type definitions (qid, stat) and values
+that are "#define" constants in C code (e.g., DMDIR, QTFILE, etc).
+This also contains the byte values for protocol codes like Tversion,
+Rversion, Rerror, and so on.
+
+    >>> td.Tversion
+    100
+    >>> td.Rlerror
+    7
+
+The qid and stat types are PFOD classes and generate instances that
+are a cross between namedtuple and OrderedDictionary (see pfod.py
+for details):
+
+    >>> td.qid(type=td.QTFILE, path=2, version=1)
+    qid(type=0, version=1, path=2)
+
+The td.stat() type output is pretty long, since it has all the
+dotu-specific members (used only when packing for dotu/dotl and
+set only when unpacking those), so here's just one field:
+
+    >>> td.stat(*(15 * [0])).mode
+    0
+    >>> import pprint; pprint.pprint(td.stat()._fields)
+    ('type',
+     'dev',
+     'qid',
+     'mode',
+     'atime',
+     'mtime',
+     'length',
+     'name',
+     'uid',
+     'gid',
+     'muid',
+     'extension',
+     'n_uid',
+     'n_gid',
+     'n_muid')
+
+Stat objects sent across the protocol must first be encoded into
+wirestat objects, which are basically size-counted pre-sequenced
+stat objects.  The pre-sequencing uses:
+
+    >>> td.stat_seq
+    Sequencer('stat')
+
+For parsing bytes returned in a Tread on a directory, td.wirestat_seq
+is the sequencer.  However, most users should rely on the packers and
+unpackers in each protocol (see {pack,unpack}_wirestat below).
+
+    >>> td.wirestat_seq
+    Sequencer('wirestat')
+
+There is a dictionary fcall_to_name that maps from byte value
+to protocol code.  Names map to themselves as well:
+
+    >>> fcall_names[101]
+    'Rversion'
+    >>> fcall_names['Tversion']
+    'Tversion'
+
+The sub-namespace rrd has request (Tversion, Topen, etc) and
+response (Rversion, Ropen, etc) data definitions.  Each of these
+is a PFOD class:
+
+    >>> rrd.Tversion(1000, 'hello', tag=0)
+    Tversion(tag=0, msize=1000, version='hello')
+
+The function p9_version() looks up the instance of each supported
+protocol, or raises a KeyError when given an invalid protocol.
+The names may be spelled in any mixture of cases.
+
+The names plain, dotu, and dotl are predefined as the three
+supported protocols:
+
+    >>> p9_version('invalid')
+    Traceback (most recent call last):
+        ...
+    KeyError: 'invalid'
+    >>> p9_version('9p2000') == plain
+    True
+    >>> p9_version('9P2000') == plain
+    True
+    >>> p9_version('9P2000.u') == dotu
+    True
+    >>> p9_version('9p2000.L') == dotl
+    True
+
+Protocol instances have a pack() method that encodes a set of
+arguments into a packet.  To know what to encode, pack() must
+receive an fcall value and a dictionary containing argument
+values, or something equivalent.  The required argument values
+depend on the fcall.  For instance, a Tversion fcall needs three
+arguments: the version name, the tag, and the msize (these of
+course are the pre-filled fields in a Tversion PFOD instance).
+
+    >>> args = {'version': '!', 'tag': 1, 'msize': 1000}
+    >>> pkt = dotu.pack(fcall='Tversion', args=args)
+    >>> len(pkt)
+    14
+
+The length of string '!' is 1, and the packet (or wire) format of
+a Tversion request is:
+
+   size[4] fcall[1] tag[2] msize[4] version[s]
+
+which corresponds to a struct's IBHIH (for the fixed size parts)
+followed by 1 B (for the string).  The overall packet is 14 bytes
+long, so we have size=9, fcall=100, tag=1, msize=1000, and the
+version string is length=1, value=33 (ord('!')).
+
+    >>> import struct
+    >>> struct.unpack('<IBHIHB', pkt)
+    (14, 100, 1, 1000, 1, 33)
+
+Of course, this packed a completely bogus "version" string, but
+that's what we told it to do.  Protocol instances remember their
+version, so we can get it right by omitting the version from the
+arguments:
+
+    >>> dotu.version
+    '9P2000.u'
+    >>> args = {'tag': 99, 'msize': 1000}
+    >>> pkt = dotu.pack(fcall='Tversion', args=args)
+    >>> len(pkt)
+    21
+
+The fcall can be supplied numerically:
+
+    >>> pkt2 = dotu.pack(fcall=td.Tversion, args=args)
+    >>> pkt == pkt2
+    True
+
+Instead of providing an fcall you can provide an instance of
+the appropriate PFOD.  In this case pack() finds the type from
+the PFOD instance.  As usual, the version parameter is filled in
+for you:
+
+    >>> pkt2 = dotu.pack(rrd.Tversion(tag=99, msize=1000))
+    >>> pkt == pkt2
+    True
+
+Note that it's up to you to check the other end's version and
+switch to a "lower" protocol as needed.  Each instance does provide
+a downgrade_to() method that gets you a possibly-downgraded instance.
+This will fail if you are actually trying to upgrade, and also if
+you provide a bogus version:
+
+    >>> dotu.downgrade_to('9P2000.L')
+    Traceback (most recent call last):
+        ...
+    KeyError: '9P2000.L'
+    >>> dotu.downgrade_to('we never heard of this protocol')
+    Traceback (most recent call last):
+        ...
+    KeyError: 'we never heard of this protocol'
+
+Hence you might use:
+
+    try:
+        proto = protocol.dotl.downgrade(vstr)
+    except KeyError:
+        pkt = protocol.plain.pack(fcall='Rerror',
+            args={'tag': tag, 'errstr': 'unknown protocol version '
+                    '{0!r}'.format(vstr)})
+    else:
+        pkt = proto.pack(fcall='Rversion', args={'tag': tag, 'msize': msize})
+
+When using a PFOD instance, it is slightly more efficient to use
+pack_from():
+
+    try:
+        proto = protocol.dotl.downgrade(vstr)
+        reply = protocol.rrd.Rversion(tag=tag, msize=msize)
+    except KeyError:
+        proto = protocol.plain
+        reply = protocol.rrd.Rerror(tag=tag,
+            errstr='unknown protocol version {0!r}'.format(vstr))
+    pkt = proto.pack_from(reply)
+
+does the equivalent of the try/except/else variant.  Note that
+the protocol.rrd.Rversion() instance has version=None.  Like
+proto.pack, the pack_from will detect this "missing" value and
+fill it in.
+
+Because errors vary (one should use Rlerror for dotl and Rerror
+for dotu and plain), and it's convenient to use an Exception
+instance for an error, all protocols provide .error().  This
+builds the appropriate kind of error response, extracting and
+converting errno's and error messages as appropriate.
+
+If <err> is an instance of Exception, err.errno provides the errnum
+or ecode value (if used, for dotu and dotl) and err.strerror as the
+errstr value (if used, for plain 9p2000).  Otherwise err should be
+an integer, and we'll use os.strerror() to get a message.
+
+When using plain 9P2000 this sends error *messages*:
+
+    >>> import errno, os
+    >>> utf8 = os.strerror(errno.ENOENT).encode('utf-8')
+    >>> pkt = None
+    >>> try:
+    ...     os.open('presumably this file does not exist here', 0)
+    ... except OSError as err:
+    ...     pkt = plain.error(1, err)
+    ...
+    >>> pkt[-len(utf8):] == utf8
+    True
+    >>> pkt2 = plain.error(1, errno.ENOENT)
+    >>> pkt == pkt2
+    True
+
+When using 9P2000.u it sends the error code as well, and when
+using 9P2000.L it sends only the error code (and more error
+codes can pass through):
+
+    >>> len(pkt)
+    34
+    >>> len(dotu.error(1, errno.ENOENT))
+    38
+    >>> len(dotl.error(1, errno.ENOENT))
+    11
+
+For even more convenience (and another slight speed hack), the
+protocol has member functions for each valid pfod, which
+effectively do a pack_from of a pfod built from the arguments.  In
+the above example this is not very useful (because we want two
+different replies), but for Rlink, for instance, which has only
+a tag, a server might implement Tlink() as:
+
+    def do_Tlink(proto, data): # data will be a protocol.rrd.Tlink(...)
+        tag = data.tag
+        dfid = data.dfid
+        fid = data.fid
+        name = data.name
+        ... some code to set up for doing the link link ...
+        try:
+            os.link(path1, path2)
+        except OSError as err:
+            return proto.error(tag, err)
+        else:
+            return proto.Rlink(tag)
+
+    >>> pkt = dotl.Rlink(12345)
+    >>> struct.unpack('<IBH', pkt)
+    (7, 71, 12345)
+
+Similarly, a client can build a Tversion packet quite trivially:
+
+    >>> vpkt = dotl.Tversion(tag=0, msize=12345)
+
+To see that this is a valid version packet, let's unpack its bytes.
+The overall length is 21 bytes: 4 bytes of size, 1 byte of code 100
+for Tversion, 2 bytes of tag, 4 bytes of msize, 2 bytes of string
+length, and 8 bytes of string '9P2000.L'.
+
+    >>> tup = struct.unpack('<IBHIH8B', vpkt)
+    >>> tup[0:5]
+    (21, 100, 0, 12345, 8)
+    >>> ''.join(chr(i) for i in tup[5:])
+    '9P2000.L'
+
+Of course, since you can *pack*, you can also *unpack*.  It's
+possible that the incoming packet is malformed.  If so, this
+raises various errors (see below).
+
+Unpack is actually a two step process: first we unpack a header
+(where the size is already removed and is implied by len(data)),
+then we unpack the data within the packet.  You can invoke the
+first step separately.  Furthermore, there's a noerror argument
+that leaves some fields set to None or empty strings, if the
+packet is too short.  (Note that we need a hack for py2k vs py3k
+strings here, for doctests.  Also, encoding 12345 into a byte
+string produces '90', by ASCII luck!)
+
+    >>> pkt = pkt[4:] # strip generated size
+    >>> import sys
+    >>> py3k = sys.version_info[0] >= 3
+    >>> b2s = lambda x: x.decode('utf-8') if py3k else x
+    >>> d = plain.unpack_header(pkt[0:1], noerror=True)
+    >>> d.data = b2s(d.data)
+    >>> d
+    Header(size=5, dsize=0, fcall=71, data='')
+    >>> d = plain.unpack_header(pkt[0:2], noerror=True)
+    >>> d.data = b2s(d.data)
+    >>> d
+    Header(size=6, dsize=1, fcall=71, data='9')
+
+Without noerror=True a short packet raises a SequenceError:
+
+    >>> plain.unpack_header(pkt[0:0])   # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    SequenceError: out of data while unpacking 'fcall'
+
+Of course, a normal packet decodes fine:
+
+    >>> d = plain.unpack_header(pkt)
+    >>> d.data = b2s(d.data)
+    >>> d
+    Header(size=7, dsize=2, fcall=71, data='90')
+
+but one that is too *long* potentially raises a SequencError.
+(This is impossible for a header, though, since the size and
+data size are both implied: either there is an fcall code, and
+the rest of the bytes are "data", or there isn't and the packet
+is too short.  So we can only demonstrate this for regular
+unpack; see below.)
+
+Note that all along, this has been decoding Rlink (fcall=71),
+which is not valid for plain 9P2000 protocol.  It's up to the
+caller to check:
+
+    >>> plain.supports(71)
+    False
+
+    >>> plain.unpack(pkt)           # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    SequenceError: invalid fcall 'Rlink' for 9P2000
+    >>> dotl.unpack(pkt)
+    Rlink(tag=12345)
+
+However, the unpack() method DOES check that the fcall type is
+valid, even if you supply noerror=True.  This is because we can
+only really decode the header, not the data, if the fcall is
+invalid:
+
+    >>> plain.unpack(pkt, noerror=True)     # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    SequenceError: invalid fcall 'Rlink' for 9P2000
+
+The same applies to much-too-short packets even if noerror is set.
+Specifically, if the (post-"size") header shortens down to the empty
+string, the fcall will be None:
+
+    >>> dotl.unpack(b'', noerror=True)      # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    SequenceError: invalid fcall None for 9P2000.L
+
+If there is at least a full header, though, noerror will do the obvious:
+
+    >>> dotl.unpack(pkt[0:1], noerror=True)
+    Rlink(tag=None)
+    >>> dotl.unpack(pkt[0:2], noerror=True)
+    Rlink(tag=None)
+
+If the packet is too long, noerror suppresses the SequenceError:
+
+    >>> dotl.unpack(pkt + b'x')             # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    SequenceError: 1 byte(s) unconsumed
+    >>> dotl.unpack(pkt + b'x', noerror=True)
+    Rlink(tag=12345)
+
+To pack a stat object when producing data for reading a directory,
+use pack_wirestat.  This puts a size in front of the packed stat
+data (they're represented this way in read()-of-directory data,
+but not elsewhere).
+
+To unpack the result of a Tstat or a read() on a directory, use
+unpack_wirestat.  The stat values are variable length so this
+works with offsets.  If the packet is truncated, you'll get a
+SequenceError, but just as for header unpacking, you can use
+noerror to suppress this.
+
+(First, we'll need to build some valid packet data.)
+
+    >>> statobj = td.stat(type=0,dev=0,qid=td.qid(0,0,0),mode=0,
+    ... atime=0,mtime=0,length=0,name=b'foo',uid=b'0',gid=b'0',muid=b'0')
+    >>> data = plain.pack_wirestat(statobj)
+    >>> len(data)
+    55
+
+Now we can unpack it:
+
+    >>> newobj, offset = plain.unpack_wirestat(data, 0)
+    >>> newobj == statobj
+    True
+    >>> offset
+    55
+
+Since the packed data do not include the dotu extensions, we get
+a SequenceError if we try to unpack with dotu or dotl:
+
+    >>> dotu.unpack_wirestat(data, 0)       # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    SequenceError: out of data while unpacking 'extension'
+
+When using noerror, the returned new offset will be greater
+than the length of the packet, after a failed unpack, and some
+elements may be None:
+
+    >>> newobj, offset = plain.unpack_wirestat(data[0:10], 0, noerror=True)
+    >>> offset
+    55
+    >>> newobj.length is None
+    True
+
+Similarly, use unpack_dirent to unpack the result of a dot-L
+readdir(), using offsets.  (Build them with pack_dirent.)
+
+    >>> dirent = td.dirent(qid=td.qid(1,2,3),offset=0,
+    ... type=td.DT_REG,name=b'foo')
+    >>> pkt = dotl.pack_dirent(dirent)
+    >>> len(pkt)
+    27
+
+and then:
+
+    >>> newde, offset = dotl.unpack_dirent(pkt, 0)
+    >>> newde == dirent
+    True
+    >>> offset
+    27
+
+"""
+
+from __future__ import print_function
+
+import collections
+import os
+import re
+import sys
+
+import p9err
+import pfod
+import sequencer
+
+SequenceError = sequencer.SequenceError
+
+fcall_names = {}
+
+# begin ???
+# to interfere with (eg) the size part of the packet:
+#   pkt = proto.pack(fcall=protocol.td.Tversion,
+#       size=123, # wrong
+#       args={ 'tag': 1, msize: 1000, version: '9p2000.u' })
+# a standard Twrite:
+#   pkt = proto.pack(fcall=protocol.td.Twrite,
+#       args={ 'tag': 1, 'fid': 2, 'offset': 0, 'data': b'rawdata' })
+# or:
+#   pkt = proto.pack(fcall=protocol.td.Twrite,
+#       data=proto.Twrite(tag=1, fid=2, offset=0, data=b'rawdata' })
+# a broken Twrite:
+#   pkt = proto.pack(fcall=protocol.td.Twrite,
+#       args={ 'tag': 1, 'fid': 2, 'offset': 0, 'count': 99,
+#           'data': b'rawdata' })  -- XXX won't work (yet?)
+#
+# build a QID: (td => typedefs and defines)
+#    qid = protocol.td.qid(type=protocol.td.QTFILE, version=1, path=2)
+# build the Twrite data as a data structure:
+#    wrdata = protocol.td.Twrite(tag=1, fid=2, offset=0, data=b'rawdata')
+#
+# turn incoming byte stream data into a Header and remaining data:
+#    foo = proto.pack(data)
+
+class _PackInfo(object):
+    """
+    Essentially just a Sequencer, except that we remember
+    if there are any :auto annotations on any of the coders,
+    and we check for coders that are string coders ('data[size]').
+
+    This could in theory be a recursive check, but in practice
+    all the automatics are at the top level, and we have no mechanism
+    to pass down inner automatics.
+    """
+    def __init__(self, seq):
+        self.seq = seq
+        self.autos = None
+        for pair in seq:        # (cond, code) pair
+            sub = pair[1]
+            if sub.aux is None:
+                continue
+            assert sub.aux == 'auto' or sub.aux == 'len'
+            if self.autos is None:
+                self.autos = []
+            self.autos.append(pair)
+
+    def __repr__(self):
+        return '{0}({1!r})'.format(self.__class__.__name__, self.seq)
+
+    def pack(self, auto_vars, conditions, data, rodata):
+        """
+        Pack data.  Insert automatic and/or counted variables
+        automatically, if they are not already set in the data.
+
+        If rodata ("read-only data") is True we make sure not
+        to modify the caller's data.  Since data is a PFOD rather
+        than a normal ordered dictionary, we use _copy().
+        """
+        if self.autos:
+            for cond, sub in self.autos:
+                # False conditionals don't need to be filled-in.
+                if cond is not None and not conditions[cond]:
+                    continue
+                if sub.aux == 'auto':
+                    # Automatic variable, e.g., version.  The
+                    # sub-coder's name ('version') is the test item.
+                    if data.get(sub.name) is None:
+                        if rodata:
+                            data = data._copy()
+                            rodata = False
+                        data[sub.name] = auto_vars[sub.name]
+                else:
+                    # Automatic length, e.g., data[count].  The
+                    # sub-coders's repeat item ('count') is the
+                    # test item.  Of course, it's possible that
+                    # the counted item is missing as well.  If so
+                    # we just leave both None and take the
+                    # encoding error.
+                    assert sub.aux == 'len'
+                    if data.get(sub.repeat) is not None:
+                        continue
+                    item = data.get(sub.name)
+                    if item is not None:
+                        if rodata:
+                            data = data._copy()
+                            rodata = False
+                        data[sub.repeat] = len(item)
+        return self.seq.pack(data, conditions)
+
+class _P9Proto(object):
+    def __init__(self, auto_vars, conditions, p9_data, pfods, index):
+        self.auto_vars = auto_vars      # currently, just version
+        self.conditions = conditions    # '.u'
+        self.pfods = pfods # dictionary, maps pfod to packinfo
+        self.index = index # for comparison: plain < dotu < dotl
+
+        self.use_rlerror = rrd.Rlerror in pfods
+
+        for dtype in pfods:
+            name = dtype.__name__
+            # For each Txxx/Rxxx, define a self.<name>() to
+            # call self.pack_from().
+            #
+            # The packinfo is from _Packinfo(seq); the fcall and
+            # seq come from p9_data.protocol[<name>].
+            proto_tuple = p9_data.protocol[name]
+            assert dtype == proto_tuple[0]
+            packinfo = pfods[dtype]
+            # in theory we can do this with no names using nested
+            # lambdas, but that's just too confusing, so let's
+            # do it with nested functions instead.
+            def builder(constructor=dtype, packinfo=packinfo):
+                "return function that calls _pack_from with built PFOD"
+                def invoker(self, *args, **kwargs):
+                    "build PFOD and call _pack_from"
+                    return self._pack_from(constructor(*args, **kwargs),
+                                           rodata=False, caller=None,
+                                           packinfo=packinfo)
+                return invoker
+            func = builder()
+            func.__name__ = name
+            func.__doc__ = 'pack from {0}'.format(name)
+            setattr(self.__class__, name, func)
+
+    def __repr__(self):
+        return '{0}({1!r})'.format(self.__class__.__name__, self.version)
+
+    def __str__(self):
+        return self.version
+
+    # define rich-comparison operators, so we can, e.g., test vers > plain
+    def __lt__(self, other):
+        return self.index < other.index
+    def __le__(self, other):
+        return self.index <= other.index
+    def __eq__(self, other):
+        return self.index == other.index
+    def __ne__(self, other):
+        return self.index != other.index
+    def __gt__(self, other):
+        return self.index > other.index
+    def __ge__(self, other):
+        return self.index >= other.index
+
+    def downgrade_to(self, other_name):
+        """
+        Downgrade from this protocol to a not-greater one.
+
+        Raises KeyError if other_name is not a valid protocol,
+        or this is not a downgrade (with setting back to self
+        considered a valid "downgrade", i.e., we're doing subseteq
+        rather than subset).
+        """
+        if not isinstance(other_name, str) and isinstance(other_name, bytes):
+            other_name = other_name.decode('utf-8', 'surrogateescape')
+        other = p9_version(other_name)
+        if other > self:
+            raise KeyError(other_name)
+        return other
+
+    def error(self, tag, err):
+        "produce Rerror or Rlerror, whichever is appropriate"
+        if isinstance(err, Exception):
+            errnum = err.errno
+            errmsg = err.strerror
+        else:
+            errnum = err
+            errmsg = os.strerror(errnum)
+        if self.use_rlerror:
+            return self.Rlerror(tag=tag, ecode=p9err.to_dotl(errnum))
+        return self.Rerror(tag=tag, errstr=errmsg,
+                           errnum=p9err.to_dotu(errnum))
+
+    def pack(self, *args, **kwargs):
+        "pack up a pfod or fcall-and-arguments"
+        fcall = kwargs.pop('fcall', None)
+        if fcall is None:
+            # Called without fcall=...
+            # This requires that args have one argument that
+            # is the PFOD; kwargs should be empty (but we'll take
+            # data=pfod as well).  The size is implied, and
+            # fcall comes from the pfod.
+            data = kwargs.pop('data', None)
+            if data is None:
+                if len(args) != 1:
+                    raise TypeError('pack() with no fcall requires 1 argument')
+                data = args[0]
+            if len(kwargs):
+                raise TypeError('pack() got an unexpected keyword argument '
+                                '{0}'.format(kwargs.popitem()[0]))
+            return self._pack_from(data, True, 'pack', None)
+
+        # Called as pack(fcall=whatever, data={...}).
+        # The data argument must be a dictionary since we're going to
+        # apply ** to it in the call to build the PFOD.  Note that
+        # it could already be a PFOD, which is OK, but we're going to
+        # copy it to a new one regardless (callers that have a PFOD
+        # should use pack_from instead).
+        if len(args):
+            raise TypeError('pack() got unexpected arguments '
+                            '{0!r}'.format(args))
+        data = kwargs.pop('args', None)
+        if len(kwargs):
+            raise TypeError('pack() got an unexpected keyword argument '
+                            '{0}'.format(kwargs.popitem()[0]))
+        if not isinstance(data, dict):
+            raise TypeError('pack() with fcall and data '
+                            'requires data to be a dictionary')
+        try:
+            name = fcall_names[fcall]
+        except KeyError:
+            raise TypeError('pack(): {0} is not a valid '
+                            'fcall value'.format(fcall))
+        cls = getattr(rrd, name)
+        data = cls(**data)
+        return self._pack_from(data, False, 'pack', None)
+
+    def pack_from(self, data):
+        "pack from pfod data, using its type to determine fcall"
+        return self._pack_from(data, True, 'pack_from', None)
+
+    def _pack_from(self, data, rodata, caller, packinfo):
+        """
+        Internal pack(): called from both invokers (self.Tversion,
+        self.Rwalk, etc.) and from pack and pack_from methods.
+        "caller" says which.  If rodata is True we're not supposed to
+        modify the incoming data, as it may belong to someone
+        else.  Some calls to pack() build a PFOD and hence pass in
+        False.
+
+        The predefined invokers pass in a preconstructed PFOD,
+        *and* set rodata=False, *and* provide a packinfo, so that
+        we never have to copy, nor look up the packinfo.
+        """
+        if caller is not None:
+            assert caller in ('pack', 'pack_from') and packinfo is None
+            # Indirect call from pack_from(), or from pack() after
+            # pack() built a PFOD.  We make sure this kind of PFOD
+            # is allowed for this protocol.
+            packinfo = self.pfods.get(data.__class__, None)
+            if packinfo is None:
+                raise TypeError('{0}({1!r}): invalid '
+                                'input'.format(caller, data))
+
+        # Pack the data
+        pkt = packinfo.pack(self.auto_vars, self.conditions, data, rodata)
+
+        fcall = data.__class__.__name__
+        fcall_code = getattr(td, fcall)
+
+        # That's the inner data; now we must add the header,
+        # with fcall (translated back to byte code value) and
+        # outer data.  The size is implied by len(pkt).  There
+        # are no other auto variables, and no conditions.
+        #
+        # NB: the size includes the size of the header itself
+        # and the fcall code byte, plus the size of the data.
+        data = _9p_data.header_pfod(size=4 + 1 + len(pkt), dsize=len(pkt),
+                                    fcall=fcall_code, data=pkt)
+        empty = None # logically should be {}, but not actually used below
+        pkt = _9p_data.header_pack_seq.pack(data, empty)
+        return pkt
+
+    @staticmethod
+    def unpack_header(bstring, noerror=False):
+        """
+        Unpack header.
+
+        We know that our caller has already stripped off the
+        overall size field (4 bytes), leaving us with the fcall
+        (1 byte) and data (len(bstring)-1 bytes).  If len(bstring)
+        is 0, this is an invalid header: set dsize to 0 and let
+        fcall become None, if noerror is set.
+        """
+        vdict = _9p_data.header_pfod()
+        vdict['size'] = len(bstring) + 4
+        vdict['dsize'] = max(0, len(bstring) - 1)
+        _9p_data.header_unpack_seq.unpack(vdict, None, bstring, noerror)
+        return vdict
+
+    def unpack(self, bstring, noerror=False):
+        "produce filled PFOD from fcall in packet"
+        vdict = self.unpack_header(bstring, noerror)
+        # NB: vdict['dsize'] is used internally during unpack, to
+        # find out how many bytes to copy to vdict['data'], but by
+        # the time unpack is done, we no longer need it.
+        #
+        # size = vdict['size']
+        # dsize = vdict['dsize']
+        fcall = vdict['fcall']
+        data = vdict['data']
+        # Note: it's possible for size and/or fcall to be None,
+        # when noerror is true.  However, if we support fcall, then
+        # clearly fcall is not None; and since fcall follows size,
+        # we can always proceed if we support fcall.
+        if self.supports(fcall):
+            fcall = fcall_names[fcall]
+            cls = getattr(rrd, fcall)
+            seq = self.pfods[cls].seq
+        elif fcall == td.Rlerror:
+            # As a special case for diod, we accept Rlerror even
+            # if it's not formally part of the protocol.
+            cls = rrd.Rlerror
+            seq = dotl.pfods[rrd.Rlerror].seq
+        else:
+            fcall = fcall_names.get(fcall, fcall)
+            raise SequenceError('invalid fcall {0!r} for '
+                                '{1}'.format(fcall, self))
+        vdict = cls()
+        seq.unpack(vdict, self.conditions, data, noerror)
+        return vdict
+
+    def pack_wirestat(self, statobj):
+        """
+        Pack a stat object to appear as data returned by read()
+        on a directory.  Essentially, we prefix the data with a size.
+        """
+        data = td.stat_seq.pack(statobj, self.conditions)
+        return td.wirestat_seq.pack({'size': len(data), 'data': data}, {})
+
+    def unpack_wirestat(self, bstring, offset, noerror=False):
+        """
+        Produce the next td.stat object from byte-string,
+        returning it and new offset.
+        """
+        statobj = td.stat()
+        d = { 'size': None }
+        newoff = td.wirestat_seq.unpack_from(d, self.conditions, bstring,
+                                             offset, noerror)
+        size = d['size']
+        if size is None:        # implies noerror; newoff==offset+2
+            return statobj, newoff
+        # We now have size and data.  If noerror, data might be
+        # too short, in which case we'll unpack a partial statobj.
+        # Or (with or without noeror), data might be too long, so
+        # that while len(data) == size, not all the data get used.
+        # That may be allowed by the protocol: it's not clear.
+        data = d['data']
+        used = td.stat_seq.unpack_from(statobj, self.conditions, data,
+                                       0, noerror)
+        # if size != used ... then what?
+        return statobj, newoff
+
+    def pack_dirent(self, dirent):
+        """
+        Dirents (dot-L only) are easy to pack, but we provide
+        this function for symmetry.  (Should we raise an error
+        if called on plain or dotu?)
+        """
+        return td.dirent_seq.pack(dirent, self.conditions)
+
+    def unpack_dirent(self, bstring, offset, noerror=False):
+        """
+        Produces the next td.dirent object from byte-string,
+        returning it and new offset.
+        """
+        deobj = td.dirent()
+        offset = td.dirent_seq.unpack_from(deobj, self.conditions, bstring,
+                                           offset, noerror)
+        return deobj, offset
+
+    def supports(self, fcall):
+        """
+        Return True if and only if this protocol supports the
+        given fcall.
+
+        >>> plain.supports(100)
+        True
+        >>> plain.supports('Tversion')
+        True
+        >>> plain.supports('Rlink')
+        False
+        """
+        fcall = fcall_names.get(fcall, None)
+        if fcall is None:
+            return False
+        cls = getattr(rrd, fcall)
+        return cls in self.pfods
+
+    def get_version(self, as_bytes=True):
+        "get Plan 9 protocol version, as string or (default) as bytes"
+        ret = self.auto_vars['version']
+        if as_bytes and not isinstance(ret, bytes):
+            ret = ret.encode('utf-8')
+        return ret
+
+    @property
+    def version(self):
+        "Plan 9 protocol version"
+        return self.get_version(as_bytes=False)
+
+DEBUG = False
+
+# This defines a special en/decoder named "s" using a magic
+# builtin.  This and stat are the only variable-length
+# decoders, and this is the only recursively-variable-length
+# one (i.e., stat decoding is effectively fixed size once we
+# handle strings).  So this magic avoids the need for recursion.
+#
+# Note that _string_ is, in effect, size[2] orig_var[size].
+_STRING_MAGIC = '_string_'
+SDesc = "typedef s: " + _STRING_MAGIC
+
+# This defines an en/decoder for type "qid",
+# which en/decodes 1 byte called type, 4 called version, and
+# 8 called path (for a total of 13 bytes).
+#
+# It also defines QTDIR, QTAPPEND, etc.  (These are not used
+# for en/decode, or at least not yet.)
+QIDDesc = """\
+typedef qid: type[1] version[4] path[8]
+
+    #define QTDIR       0x80
+    #define QTAPPEND    0x40
+    #define QTEXCL      0x20
+    #define QTMOUNT     0x10
+    #define QTAUTH      0x08
+    #define QTTMP       0x04
+    #define QTSYMLINK   0x02
+    #define QTFILE      0x00
+"""
+
+# This defines a stat decoder, which has a 9p2000 standard front,
+# followed by an optional additional portion.
+#
+# The constants are named DMDIR etc.
+STATDesc = """
+typedef stat: type[2] dev[4] qid[qid] mode[4] atime[4] mtime[4] \
+length[8] name[s] uid[s] gid[s] muid[s] \
+{.u: extension[s] n_uid[4] n_gid[4] n_muid[4] }
+
+    #define DMDIR           0x80000000
+    #define DMAPPEND        0x40000000
+    #define DMMOUNT         0x10000000
+    #define DMAUTH          0x08000000
+    #define DMTMP           0x04000000
+    #define DMSYMLINK       0x02000000
+            /* 9P2000.u extensions */
+    #define DMDEVICE        0x00800000
+    #define DMNAMEDPIPE     0x00200000
+    #define DMSOCKET        0x00100000
+    #define DMSETUID        0x00080000
+    #define DMSETGID        0x00040000
+"""
+
+# This defines a wirestat decoder.  A wirestat is a size and then
+# a (previously encoded, or future-decoded) stat.
+WirestatDesc = """
+typedef wirestat: size[2] data[size]
+"""
+
+# This defines a dirent decoder, which has a dot-L specific format.
+#
+# The dirent type fields are defined as DT_* (same as BSD and Linux).
+DirentDesc = """
+typedef dirent: qid[qid] offset[8] type[1] name[s]
+
+    #define DT_UNKNOWN       0
+    #define DT_FIFO          1
+    #define DT_CHR           2
+    #define DT_DIR           4
+    #define DT_BLK           6
+    #define DT_REG           8
+    #define DT_LNK          10
+    #define DT_SOCK         12
+    #define DT_WHT          14
+"""
+
+# N.B.: this is largely a slightly more rigidly formatted variant of
+# the contents of:
+# https://github.com/chaos/diod/blob/master/protocol.md
+#
+# Note that <name> = <value>: ... assigns names for the fcall
+# fcall (function call) table.  Names without "= value" are
+# assumed to be the previous value +1 (and the two names are
+# also checked to make sure they are Tfoo,Rfoo).
+ProtocolDesc = """\
+Rlerror.L = 7: tag[2] ecode[4]
+    ecode is a numerical Linux errno
+
+Tstatfs.L = 8: tag[2] fid[4]
+Rstatfs.L: tag[2] type[4] bsize[4] blocks[8] bfree[8] bavail[8] \
+         files[8] ffree[8] fsid[8] namelen[4]
+    Rstatfs corresponds to Linux statfs structure:
+    struct statfs {
+        long    f_type;     /* type of file system */
+        long    f_bsize;    /* optimal transfer block size */
+        long    f_blocks;   /* total data blocks in file system */
+        long    f_bfree;    /* free blocks in fs */
+        long    f_bavail;   /* free blocks avail to non-superuser */
+        long    f_files;    /* total file nodes in file system */
+        long    f_ffree;    /* free file nodes in fs */
+        fsid_t  f_fsid;     /* file system id */
+        long    f_namelen;  /* maximum length of filenames */
+    };
+
+    This comes from nowhere obvious...
+        #define FSTYPE      0x01021997
+
+Tlopen.L = 12: tag[2] fid[4] flags[4]
+Rlopen.L: tag[2] qid[qid] iounit[4]
+    lopen prepares fid for file (or directory) I/O.
+
+    flags contains Linux open(2) flag bits, e.g., O_RDONLY, O_RDWR, O_WRONLY.
+
+        #define L_O_CREAT       000000100
+        #define L_O_EXCL        000000200
+        #define L_O_NOCTTY      000000400
+        #define L_O_TRUNC       000001000
+        #define L_O_APPEND      000002000
+        #define L_O_NONBLOCK    000004000
+        #define L_O_DSYNC       000010000
+        #define L_O_FASYNC      000020000
+        #define L_O_DIRECT      000040000
+        #define L_O_LARGEFILE   000100000
+        #define L_O_DIRECTORY   000200000
+        #define L_O_NOFOLLOW    000400000
+        #define L_O_NOATIME     001000000
+        #define L_O_CLOEXEC     002000000
+        #define L_O_SYNC        004000000
+        #define L_O_PATH        010000000
+        #define L_O_TMPFILE     020000000
+
+Tlcreate.L = 14: tag[2] fid[4] name[s] flags[4] mode[4] gid[4]
+Rlcreate.L: tag[2] qid[qid] iounit[4]
+    lcreate creates a regular file name in directory fid and prepares
+    it for I/O.
+
+    fid initially represents the parent directory of the new file.
+    After the call it represents the new file.
+
+    flags contains Linux open(2) flag bits (including O_CREAT).
+
+    mode contains Linux creat(2) mode (permissions) bits.
+
+    gid is the effective gid of the caller.
+
+Tsymlink.L = 16: tag[2] dfid[4] name[s] symtgt[s] gid[4]
+Rsymlink.L: tag[2] qid[qid]
+    symlink creates a symbolic link name in directory dfid.  The
+    link will point to symtgt.
+
+    gid is the effective group id of the caller.
+
+    The qid for the new symbolic link is returned in the reply.
+
+Tmknod.L = 18: tag[2] dfid[4] name[s] mode[4] major[4] minor[4] gid[4]
+Rmknod.L: tag[2] qid[qid]
+    mknod creates a device node name in directory dfid with major
+    and minor numbers.
+
+    mode contains Linux mknod(2) mode bits.  (Note that these
+    include the S_IFMT bits which may be S_IFBLK, S_IFCHR, or
+    S_IFSOCK.)
+
+    gid is the effective group id of the caller.
+
+    The qid for the new device node is returned in the reply.
+
+Trename.L = 20: tag[2] fid[4] dfid[4] name[s]
+Rrename.L: tag[2]
+    rename renames a file system object referenced by fid, to name
+    in the directory referenced by dfid.
+
+    This operation will eventually be replaced by renameat.
+
+Treadlink.L = 22: tag[2] fid[4]
+Rreadlink.L: tag[2] target[s]
+    readlink returns the contents of teh symbolic link referenced by fid.
+
+Tgetattr.L = 24: tag[2] fid[4] request_mask[8]
+Rgetattr.L: tag[2] valid[8] qid[qid] mode[4] uid[4] gid[4] nlink[8] \
+          rdev[8] size[8] blksize[8] blocks[8] \
+          atime_sec[8] atime_nsec[8] mtime_sec[8] mtime_nsec[8] \
+          ctime_sec[8] ctime_nsec[8] btime_sec[8] btime_nsec[8] \
+          gen[8] data_version[8]
+
+    getattr gets attributes of a file system object referenced by fid.
+    The response is intended to follow pretty closely the fields
+    returned by the stat(2) system call:
+
+    struct stat {
+        dev_t     st_dev;     /* ID of device containing file */
+        ino_t     st_ino;     /* inode number */
+        mode_t    st_mode;    /* protection */
+        nlink_t   st_nlink;   /* number of hard links */
+        uid_t     st_uid;     /* user ID of owner */
+        gid_t     st_gid;     /* group ID of owner */
+        dev_t     st_rdev;    /* device ID (if special file) */
+        off_t     st_size;    /* total size, in bytes */
+        blksize_t st_blksize; /* blocksize for file system I/O */
+        blkcnt_t  st_blocks;  /* number of 512B blocks allocated */
+        time_t    st_atime;   /* time of last access */
+        time_t    st_mtime;   /* time of last modification */
+        time_t    st_ctime;   /* time of last status change */
+    };
+
+    The differences are:
+
+     * st_dev is omitted
+     * st_ino is contained in the path component of qid
+     * times are nanosecond resolution
+     * btime, gen and data_version fields are reserved for future use
+
+    Not all fields are valid in every call. request_mask is a bitmask
+    indicating which fields are requested. valid is a bitmask
+    indicating which fields are valid in the response. The mask
+    values are as follows:
+
+    #define GETATTR_MODE        0x00000001
+    #define GETATTR_NLINK       0x00000002
+    #define GETATTR_UID         0x00000004
+    #define GETATTR_GID         0x00000008
+    #define GETATTR_RDEV        0x00000010
+    #define GETATTR_ATIME       0x00000020
+    #define GETATTR_MTIME       0x00000040
+    #define GETATTR_CTIME       0x00000080
+    #define GETATTR_INO         0x00000100
+    #define GETATTR_SIZE        0x00000200
+    #define GETATTR_BLOCKS      0x00000400
+
+    #define GETATTR_BTIME       0x00000800
+    #define GETATTR_GEN         0x00001000
+    #define GETATTR_DATA_VERSION 0x00002000
+
+    #define GETATTR_BASIC       0x000007ff  /* Mask for fields up to BLOCKS */
+    #define GETATTR_ALL         0x00003fff  /* Mask for All fields above */
+
+Tsetattr.L = 26: tag[2] fid[4] valid[4] mode[4] uid[4] gid[4] size[8] \
+               atime_sec[8] atime_nsec[8] mtime_sec[8] mtime_nsec[8]
+Rsetattr.L: tag[2]
+    setattr sets attributes of a file system object referenced by
+    fid.  As with getattr, valid is a bitmask selecting which
+    fields to set, which can be any combination of:
+
+    mode - Linux chmod(2) mode bits.
+
+    uid, gid - New owner, group of the file as described in Linux chown(2).
+
+    size - New file size as handled by Linux truncate(2).
+
+    atime_sec, atime_nsec - Time of last file access.
+
+    mtime_sec, mtime_nsec - Time of last file modification.
+
+    The valid bits are defined as follows:
+
+    #define SETATTR_MODE        0x00000001
+    #define SETATTR_UID         0x00000002
+    #define SETATTR_GID         0x00000004
+    #define SETATTR_SIZE        0x00000008
+    #define SETATTR_ATIME       0x00000010
+    #define SETATTR_MTIME       0x00000020
+    #define SETATTR_CTIME       0x00000040
+    #define SETATTR_ATIME_SET   0x00000080
+    #define SETATTR_MTIME_SET   0x00000100
+
+    If a time bit is set without the corresponding SET bit, the
+    current system time on the server is used instead of the value
+    sent in the request.
+
+Txattrwalk.L = 30: tag[2] fid[4] newfid[4] name[s]
+Rxattrwalk.L: tag[2] size[8]
+    xattrwalk gets a newfid pointing to xattr name.  This fid can
+    later be used to read the xattr value.  If name is NULL newfid
+    can be used to get the list of extended attributes associated
+    with the file system object.
+
+Txattrcreate.L = 32: tag[2] fid[4] name[s] attr_size[8] flags[4]
+Rxattrcreate.L: tag[2]
+    xattrcreate gets a fid pointing to the xattr name.  This fid
+    can later be used to set the xattr value.
+
+    flag is derived from set Linux setxattr. The manpage says
+
+        The flags parameter can be used to refine the semantics of
+        the operation.  XATTR_CREATE specifies a pure create,
+        which fails if the named attribute exists already.
+        XATTR_REPLACE specifies a pure replace operation, which
+        fails if the named attribute does not already exist.  By
+        default (no flags), the extended attribute will be created
+        if need be, or will simply replace the value if the
+        attribute exists.
+
+    The actual setxattr operation happens when the fid is clunked.
+    At that point the written byte count and the attr_size
+    specified in TXATTRCREATE should be same otherwise an error
+    will be returned.
+
+Treaddir.L = 40: tag[2] fid[4] offset[8] count[4]
+Rreaddir.L: tag[2] count[4] data[count]
+    readdir requests that the server return directory entries from
+    the directory represented by fid, previously opened with
+    lopen.  offset is zero on the first call.
+
+    Directory entries are represented as variable-length records:
+        qid[qid] offset[8] type[1] name[s]
+    At most count bytes will be returned in data.  If count is not
+    zero in the response, more data is available.  On subsequent
+    calls, offset is the offset returned in the last directory
+    entry of the previous call.
+
+Tfsync.L = 50: tag[2] fid[4]
+Rfsync.L: tag[2]
+    fsync tells the server to flush any cached data associated
+    with fid, previously opened with lopen.
+
+Tlock.L = 52: tag[2] fid[4] type[1] flags[4] start[8] length[8] \
+       proc_id[4] client_id[s]
+Rlock.L: tag[2] status[1]
+    lock is used to acquire or release a POSIX record lock on fid
+    and has semantics similar to Linux fcntl(F_SETLK).
+
+    type has one of the values:
+
+        #define LOCK_TYPE_RDLCK 0
+        #define LOCK_TYPE_WRLCK 1
+        #define LOCK_TYPE_UNLCK 2
+
+    start, length, and proc_id correspond to the analagous fields
+    passed to Linux fcntl(F_SETLK):
+
+    struct flock {
+        short l_type;  /* Type of lock: F_RDLCK, F_WRLCK, F_UNLCK */
+        short l_whence;/* How to intrprt l_start: SEEK_SET,SEEK_CUR,SEEK_END */
+        off_t l_start; /* Starting offset for lock */
+        off_t l_len;   /* Number of bytes to lock */
+        pid_t l_pid;   /* PID of process blocking our lock (F_GETLK only) */
+    };
+
+    flags bits are:
+
+        #define LOCK_SUCCESS    0
+        #define LOCK_BLOCKED    1
+        #define LOCK_ERROR      2
+        #define LOCK_GRACE      3
+
+    The Linux v9fs client implements the fcntl(F_SETLKW)
+    (blocking) lock request by calling lock with
+    LOCK_FLAGS_BLOCK set.  If the response is LOCK_BLOCKED,
+    it retries the lock request in an interruptible loop until
+    status is no longer LOCK_BLOCKED.
+
+    The Linux v9fs client translates BSD advisory locks (flock) to
+    whole-file POSIX record locks.  v9fs does not implement
+    mandatory locks and will return ENOLCK if use is attempted.
+
+    Because of POSIX record lock inheritance and upgrade
+    properties, pass-through servers must be implemented
+    carefully.
+
+Tgetlock.L = 54: tag[2] fid[4] type[1] start[8] length[8] proc_id[4] \
+               client_id[s]
+Rgetlock.L: tag[2] type[1] start[8] length[8] proc_id[4] client_id[s]
+    getlock tests for the existence of a POSIX record lock and has
+    semantics similar to Linux fcntl(F_GETLK).
+
+    As with lock, type has one of the values defined above, and
+    start, length, and proc_id correspond to the analagous fields
+    in struct flock passed to Linux fcntl(F_GETLK), and client_Id
+    is an additional mechanism for uniquely identifying the lock
+    requester and is set to the nodename by the Linux v9fs client.
+
+Tlink.L = 70: tag[2] dfid[4] fid[4] name[s]
+Rlink.L: tag[2]
+    link creates a hard link name in directory dfid.  The link
+    target is referenced by fid.
+
+Tmkdir.L = 72: tag[2] dfid[4] name[s] mode[4] gid[4]
+Rmkdir.L: tag[2] qid[qid]
+    mkdir creates a new directory name in parent directory dfid.
+
+    mode contains Linux mkdir(2) mode bits.
+
+    gid is the effective group ID of the caller.
+
+    The qid of the new directory is returned in the response.
+
+Trenameat.L = 74: tag[2] olddirfid[4] oldname[s] newdirfid[4] newname[s]
+Rrenameat.L: tag[2]
+    Change the name of a file from oldname to newname, possible
+    moving it from old directory represented by olddirfid to new
+    directory represented by newdirfid.
+
+    If the server returns ENOTSUPP, the client should fall back to
+    the rename operation.
+
+Tunlinkat.L = 76: tag[2] dirfd[4] name[s] flags[4]
+Runlinkat.L: tag[2]
+    Unlink name from directory represented by dirfd.  If the file
+    is represented by a fid, that fid is not clunked.  If the
+    server returns ENOTSUPP, the client should fall back to the
+    remove operation.
+
+    There seems to be only one defined flag:
+
+        #define AT_REMOVEDIR    0x200
+
+Tversion = 100: tag[2] msize[4] version[s]:auto
+Rversion: tag[2] msize[4] version[s]
+
+    negotiate protocol version
+
+    version establishes the msize, which is the maximum message
+    size inclusive of the size value that can be handled by both
+    client and server.
+
+    It also establishes the protocol version.  For 9P2000.L
+    version must be the string 9P2000.L.
+
+Tauth = 102: tag[2] afid[4] uname[s] aname[s] n_uname[4]
+Rauth: tag[2] aqid[qid]
+    auth initiates an authentication handshake for n_uname.
+    Rlerror is returned if authentication is not required.  If
+    successful, afid is used to read/write the authentication
+    handshake (protocol does not specify what is read/written),
+    and afid is presented in the attach.
+
+Tattach = 104: tag[2] fid[4] afid[4] uname[s] aname[s] {.u: n_uname[4] }
+Rattach: tag[2] qid[qid]
+    attach introduces a new user to the server, and establishes
+    fid as the root for that user on the file tree selected by
+    aname.
+
+    afid can be NOFID (~0) or the fid from a previous auth
+    handshake.  The afid can be clunked immediately after the
+    attach.
+
+        #define NOFID       0xffffffff
+
+    n_uname, if not set to NONUNAME (~0), is the uid of the
+    user and is used in preference to uname.  Note that it appears
+    in both .u and .L (unlike most .u-specific features).
+
+        #define NONUNAME    0xffffffff
+
+    v9fs has several modes of access which determine how it uses
+    attach.  In the default access=user, an initial attach is sent
+    for the user provided in the uname=name mount option, and for
+    each user that accesses the file system thereafter.  For
+    access=, only the initial attach is sent for and all other
+    users are denied access by the client.
+
+Rerror = 107: tag[2] errstr[s] {.u: errnum[4] }
+
+Tflush = 108: tag[2] oldtag[2]
+Rflush: tag[2]
+    flush aborts an in-flight request referenced by oldtag, if any.
+
+Twalk = 110: tag[2] fid[4] newfid[4] nwname[2] nwname*(wname[s])
+Rwalk: tag[2] nwqid[2] nwqid*(wqid[qid])
+    walk is used to descend a directory represented by fid using
+    successive path elements provided in the wname array.  If
+    succesful, newfid represents the new path.
+
+    fid can be cloned to newfid by calling walk with nwname set to
+    zero.
+
+    if nwname==0, fid need not represent a directory.
+
+Topen = 112: tag[2] fid[4] mode[1]
+Ropen: tag[2] qid[qid] iounit[4]
+    open prepares fid for file (or directory) I/O.
+
+    mode is:
+        #define OREAD       0   /* open for read */
+        #define OWRITE      1   /* open for write */
+        #define ORDWR       2   /* open for read and write */
+        #define OEXEC       3   /* open for execute */
+
+        #define OTRUNC      16  /* truncate (illegal if OEXEC) */
+        #define OCEXEC      32  /* close on exec (nonsensical) */
+        #define ORCLOSE     64  /* remove on close */
+        #define ODIRECT     128 /* direct access (.u extension?) */
+
+Tcreate = 114: tag[2] fid[4] name[s] perm[4] mode[1] {.u: extension[s] }
+Rcreate: tag[2] qid[qid] iounit[4]
+    create is similar to open; however, the incoming fid is the
+    diretory in which the file is to be created, and on success,
+    return, the fid refers to the then-created file.
+
+Tread = 116: tag[2] fid[4] offset[8] count[4]
+Rread: tag[2] count[4] data[count]
+    perform a read on the file represented by fid.  Note that in
+    v9fs, a read(2) or write(2) system call for a chunk of the
+    file that won't fit in a single request is broken up into
+    multiple requests.
+
+    Under 9P2000.L, read cannot be used on directories.  See readdir.
+
+Twrite = 118: tag[2] fid[4] offset[8] count[4] data[count]
+Rwrite: tag[2] count[4]
+    perform a write on the file represented by fid.  Note that in
+    v9fs, a read(2) or write(2) system call for a chunk of the
+    file that won't fit in a single request is broken up into
+    multiple requests.
+
+    write cannot be used on directories.
+
+Tclunk = 120: tag[2] fid[4]
+Rclunk: tag[2]
+    clunk signifies that fid is no longer needed by the client.
+
+Tremove = 122: tag[2] fid[4]
+Rremove: tag[2]
+    remove removes the file system object represented by fid.
+
+    The fid is always clunked (even on error).
+
+Tstat = 124: tag[2] fid[4]
+Rstat: tag[2] size[2] data[size]
+
+Twstat = 126: tag[2] fid[4] size[2] data[size]
+Rwstat: tag[2]
+"""
+
+class _Token(object):
+    r"""
+    A scanned token.
+
+    Tokens have a type (tok.ttype) and value (tok.value).  The value
+    is generally the token itself, although sometimes a prefix and/or
+    suffix has been removed (for 'label', 'word*', ':aux', and
+    '[type]' tokens).  If prefix and/or suffix are removed, the full
+    original token is
+    in its .orig.
+
+    Tokens are:
+     - 'word', 'word*', or 'label':
+         '[.\w]+' followed by optional '*' or ':':
+
+     - 'aux': ':' followed by '\w+' (used for :auto annotation)
+
+     - 'type':
+       open bracket '[', followed by '\w+' or '\d+' (only one of these),
+       followed by close bracket ']'
+
+     - '(', ')', '{', '}': themeselves
+
+    Each token can have arbitrary leading white space (which is
+    discarded).
+
+    (Probably should return ':' as a char and handle it in parser,
+    but oh well.)
+    """
+    def __init__(self, ttype, value, orig=None):
+        self.ttype = ttype
+        self.value = value
+        self.orig = value if orig is None else orig
+        if self.ttype == 'type' and self.value.isdigit():
+            self.ival = int(self.value)
+        else:
+            self.ival = None
+    def __str__(self):
+        return self.orig
+
+_Token.tok_expr = re.compile(r'\s*([.\w]+(?:\*|:)?'
+                             r'|:\w+'
+                             r'|\[(?:\w+|\d+)\]'
+                             r'|[(){}])')
+
+def _scan(string):
+    """
+    Tokenize a string.
+
+    Note: This raises a ValueError with the position of any unmatched
+    character in the string.
+    """
+    tlist = []
+
+    # make sure entire string is tokenized properly
+    pos = 0
+    for item in _Token.tok_expr.finditer(string):
+        span = item.span()
+        if span[0] != pos:
+            print('error: unmatched character(s) in input\n{0}\n{1}^'.format(
+                string, ' ' * pos))
+            raise ValueError('unmatched lexeme', pos)
+        pos = span[1]
+        tlist.append(item.group(1))
+    if pos != len(string):
+        print('error: unmatched character(s) in input\n{0}\n{1}^'.format(
+            string, ' ' * pos))
+        raise ValueError('unmatched lexeme', pos)
+
+    # classify each token, stripping decorations
+    result = []
+    for item in tlist:
+        if item in ('(', ')', '{', '}'):
+            tok = _Token(item, item)
+        elif item[0] == ':':
+            tok = _Token('aux', item[1:], item)
+        elif item.endswith(':'):
+            tok = _Token('label', item[0:-1], item)
+        elif item.endswith('*'):
+            tok = _Token('word*', item[0:-1], item)
+        elif item[0] == '[':
+            # integer or named type
+            if item[-1] != ']':
+                raise ValueError('internal error: "{0}" is not [...]'.format(
+                    item))
+            tok = _Token('type', item[1:-1], item)
+        else:
+            tok = _Token('word', item)
+        result.append(tok)
+    return result
+
+def _debug_print_sequencer(seq):
+    """for debugging"""
+    print('sequencer is {0!r}'.format(seq), file=sys.stderr)
+    for i, enc in enumerate(seq):
+        print(' [{0:d}] = {1}'.format(i, enc), file=sys.stderr)
+
+def _parse_expr(seq, string, typedefs):
+    """
+    Parse "expression-ish" items, which is a list of:
+        name[type]
+        name*(subexpr)    (a literal asterisk)
+        { label ... }
+
+    The "type" may be an integer or a second name.  In the case
+    of a second name it must be something from <typedefs>.
+
+    The meaning of name[integer] is that we are going to encode
+    or decode a fixed-size field of <integer> bytes, using the
+    given name.
+
+    For name[name2], we can look up name2 in our typedefs table.
+    The only real typedefs's used here are "stat" and "s"; each
+    of these expands to a variable-size encode/decode.  See the
+    special case below, though.
+
+    The meaning of name*(...) is: the earlier name will have been
+    defined by an earlier _parse_expr for this same line.  That
+    earlier name provides a repeat-count.
+
+    Inside the parens we get a name[type] sub-expressino.  This may
+    not recurse further, so we can use a pretty cheesy parser.
+
+    As a special case, given name[name2], we first check whether
+    name2 is an earlier name a la name*(...).  Here the meaning
+    is much like name2*(name[1]), except that the result is a
+    simple byte string, rather than an array.
+
+    The meaning of "{ label ... " is that everything following up
+    to "}" is optional and used only with 9P2000.u and/or 9P2000.L.
+    Inside the {...} pair is the usual set of tokens, but again
+    {...} cannot recurse.
+
+    The parse fills in a Sequencer instance, and returns a list
+    of the parsed names.
+    """
+    names = []
+    cond = None
+
+    tokens = collections.deque(_scan(string))
+
+    def get_subscripted(tokens):
+        """
+        Allows name[integer] and name1[name2] only; returns
+        tuple after stripping off both tokens, or returns None
+        and does not strip tokens.
+        """
+        if len(tokens) == 0 or tokens[0].ttype != 'word':
+            return None
+        if len(tokens) > 1 and tokens[1].ttype == 'type':
+            word = tokens.popleft()
+            return word, tokens.popleft()
+        return None
+
+    def lookup(name, typeinfo, aux=None):
+        """
+        Convert cond (if not None) to its .value, so that instead
+        of (x, '.u') we get '.u'.
+
+        Convert typeinfo to an encdec.  Typeinfo may be 1/2/4/8, or
+        one of our typedef names.  If it's a typedef name it will
+        normally correspond to an EncDecTyped, but we have one special
+        case for string types, and another for using an earlier-defined
+        variable.
+        """
+        condval = None if cond is None else cond.value
+        if typeinfo.ival is None:
+            try:
+                cls, sub = typedefs[typeinfo.value]
+            except KeyError:
+                raise ValueError('unknown type name {0}'.format(typeinfo))
+            # the type name is typeinfo.value; the corresponding
+            # pfod class is cls; the *variable* name is name;
+            # and the sub-sequence is sub.  But if cls is None
+            # then it's our string type.
+            if cls is None:
+                encdec = sequencer.EncDecSimple(name, _STRING_MAGIC, aux)
+            else:
+                encdec = sequencer.EncDecTyped(cls, name, sub, aux)
+        else:
+            if typeinfo.ival not in (1, 2, 4, 8):
+                raise ValueError('bad integer code in {0}'.format(typeinfo))
+            encdec = sequencer.EncDecSimple(name, typeinfo.ival, aux)
+        return condval, encdec
+
+    def emit_simple(name, typeinfo, aux=None):
+        """
+        Emit name[type].  We may be inside a conditional; if so
+        cond is not None.
+        """
+        condval, encdec = lookup(name, typeinfo, aux)
+        seq.append_encdec(condval, encdec)
+        names.append(name)
+
+    def emit_repeat(name1, name2, typeinfo):
+        """
+        Emit name1*(name2[type]).
+
+        Note that the conditional is buried in the sub-coder for
+        name2.  It must be passed through anyway in case the sub-
+        coder is only partly conditional.  If the sub-coder is
+        fully conditional, each sub-coding uses or produces no
+        bytes and hence the array itself is effectively conditional
+        as well (it becomes name1 * [None]).
+
+        We don't (currently) have any auxiliary data for arrays.
+        """
+        if name1 not in names:
+            raise ValueError('{0}*({1}[{2}]): '
+                             '{0} undefined'.format(name1, name2,
+                                                    typeinfo.value))
+        condval, encdec = lookup(name2, typeinfo)
+        encdec = sequencer.EncDecA(name1, name2, encdec)
+        seq.append_encdec(condval, encdec)
+        names.append(name2)
+
+    def emit_bytes_repeat(name1, name2):
+        """
+        Emit name1[name2], e.g., data[count].
+        """
+        condval = None if cond is None else cond.value
+        # Note that the two names are reversed when compared to
+        # count*(data[type]).  The "sub-coder" is handled directly
+        # by EncDecA, hence is None.
+        #
+        # As a peculiar side effect, all bytes-repeats cause the
+        # count itself to become automatic (to have an aux of 'len').
+        encdec = sequencer.EncDecA(name2, name1, None, 'len')
+        seq.append_encdec(condval, encdec)
+        names.append(name1)
+
+    supported_conditions = ('.u')
+    while tokens:
+        token = tokens.popleft()
+        if token.ttype == 'label':
+            raise ValueError('misplaced label')
+        if token.ttype == 'aux':
+            raise ValueError('misplaced auxiliary')
+        if token.ttype == '{':
+            if cond is not None:
+                raise ValueError('nested "{"')
+            if len(tokens) == 0:
+                raise ValueError('unclosed "{"')
+            cond = tokens.popleft()
+            if cond.ttype != 'label':
+                raise ValueError('"{" not followed by cond label')
+            if cond.value not in supported_conditions:
+                raise ValueError('unsupported condition "{0}"'.format(
+                    cond.value))
+            continue
+        if token.ttype == '}':
+            if cond is None:
+                raise ValueError('closing "}" w/o opening "{"')
+            cond = None
+            continue
+        if token.ttype == 'word*':
+            if len(tokens) == 0 or tokens[0].ttype != '(':
+                raise ValueError('{0} not followed by (...)'.format(token))
+            tokens.popleft()
+            repeat = get_subscripted(tokens)
+            if repeat is None:
+                raise ValueError('parse error after {0}('.format(token))
+            if len(tokens) == 0 or tokens[0].ttype != ')':
+                raise ValueError('missing ")" after {0}({1}{2}'.format(
+                    token, repeat[0], repeat[1]))
+            tokens.popleft()
+            # N.B.: a repeat cannot have an auxiliary info (yet?).
+            emit_repeat(token.value, repeat[0].value, repeat[1])
+            continue
+        if token.ttype == 'word':
+            # Special case: _STRING_MAGIC turns into a string
+            # sequencer.  This should be used with just one
+            # typedef (typedef s: _string_).
+            if token.value == _STRING_MAGIC:
+                names.append(_STRING_MAGIC) # XXX temporary
+                continue
+            if len(tokens) == 0 or tokens[0].ttype != 'type':
+                raise ValueError('parse error after {0}'.format(token))
+            type_or_size = tokens.popleft()
+            # Check for name[name2] where name2 is a word (not a
+            # number) that is in the names[] array.
+            if type_or_size.value in names:
+                # NB: this cannot have auxiliary info.
+                emit_bytes_repeat(token.value, type_or_size.value)
+                continue
+            if len(tokens) > 0 and tokens[0].ttype == 'aux':
+                aux = tokens.popleft()
+                if aux.value != 'auto':
+                    raise ValueError('{0}{1}: only know "auto", not '
+                                     '{2}'.format(token, type_or_size,
+                                                  aux.value))
+                emit_simple(token.value, type_or_size, aux.value)
+            else:
+                emit_simple(token.value, type_or_size)
+            continue
+        raise ValueError('"{0}" not valid here"'.format(token))
+
+    if cond is not None:
+        raise ValueError('unclosed "}"')
+
+    return names
+
+class _ProtoDefs(object):
+    def __init__(self):
+        # Scan our typedefs. This may execute '#define's as well.
+        self.typedefs = {}
+        self.defines = {}
+        typedef_re = re.compile(r'\s*typedef\s+(\w+)\s*:\s*(.*)')
+        self.parse_lines('SDesc', SDesc, typedef_re, self.handle_typedef)
+        self.parse_lines('QIDDesc', QIDDesc, typedef_re, self.handle_typedef)
+        self.parse_lines('STATDesc', STATDesc, typedef_re, self.handle_typedef)
+        self.parse_lines('WirestatDesc', WirestatDesc, typedef_re,
+                         self.handle_typedef)
+        self.parse_lines('DirentDesc', DirentDesc, typedef_re,
+                         self.handle_typedef)
+
+        # Scan protocol (the bulk of the work).  This, too, may
+        # execute '#define's.
+        self.protocol = {}
+        proto_re = re.compile(r'(\*?\w+)(\.\w+)?\s*(?:=\s*(\d+))?\s*:\s*(.*)')
+        self.prev_proto_value = None
+        self.parse_lines('ProtocolDesc', ProtocolDesc,
+                         proto_re, self.handle_proto_def)
+
+        self.setup_header()
+
+        # set these up for export()
+        self.plain = {}
+        self.dotu = {}
+        self.dotl = {}
+
+    def parse_lines(self, name, text, regexp, match_handler):
+        """
+        Parse a sequence of lines.  Match each line using the
+        given regexp, or (first) as a #define line.  Note that
+        indented lines are either #defines or are commentary!
+
+        If hnadling raises a ValueError, we complain and include
+        the appropriate line offset.  Then we sys.exit(1) (!).
+        """
+        define = re.compile(r'\s*#define\s+(\w+)\s+([^/]*)'
+                            r'(\s*/\*.*\*/)?\s*$')
+        for lineoff, line in enumerate(text.splitlines()):
+            try:
+                match = define.match(line)
+                if match:
+                    self.handle_define(*match.groups())
+                    continue
+                match = regexp.match(line)
+                if match:
+                    match_handler(*match.groups())
+                    continue
+                if len(line) and not line[0].isspace():
+                    raise ValueError('unhandled line: {0}'.format(line))
+            except ValueError as err:
+                print('Internal error while parsing {0}:\n'
+                      '    {1}\n'
+                      '(at line offset +{2}, discounting \\-newline)\n'
+                      'The original line in question reads:\n'
+                      '{3}'.format(name, err.args[0], lineoff, line),
+                      file=sys.stderr)
+                sys.exit(1)
+
+    def handle_define(self, name, value, comment):
+        """
+        Handle #define match.
+
+        The regexp has three fields, matching the name, value,
+        and possibly-empty comment; these are our arguments.
+        """
+        # Obnoxious: int(,0) requires new 0o syntax in py3k;
+        # work around by trying twice, once with base 0, then again
+        # with explicit base 8 if the first attempt fails.
+        try:
+            value = int(value, 0)
+        except ValueError:
+            value = int(value, 8)
+        if DEBUG:
+            print('define: defining {0} as {1:x}'.format(name, value),
+                  file=sys.stderr)
+        if name in self.defines:
+            raise ValueError('redefining {0}'.format(name))
+        self.defines[name] = (value, comment)
+
+    def handle_typedef(self, name, expr):
+        """
+        Handle typedef match.
+
+        The regexp has just two fields, the name and the expression
+        to parse (note that the expression must fit all on one line,
+        using backslach-newline if needed).
+
+        Typedefs may refer back to existing typedefs, so we pass
+        self.typedefs to _parse_expr().
+        """
+        seq = sequencer.Sequencer(name)
+        fields = _parse_expr(seq, expr, self.typedefs)
+        # Check for special string magic typedef.  (The name
+        # probably should be just 's' but we won't check that
+        # here.)
+        if len(fields) == 1 and fields[0] == _STRING_MAGIC:
+            cls = None
+        else:
+            cls = pfod.pfod(name, fields)
+        if DEBUG:
+            print('typedef: {0} = {1!r}; '.format(name, fields),
+                  end='', file=sys.stderr)
+            _debug_print_sequencer(seq)
+        if name in self.typedefs:
+            raise ValueError('redefining {0}'.format(name))
+        self.typedefs[name] = cls, seq
+
+    def handle_proto_def(self, name, proto_version, value, expr):
+        """
+        Handle protocol definition.
+
+        The regexp matched:
+        - The name of the protocol option such as Tversion,
+          Rversion, Rlerror, etc.
+        - The protocol version, if any (.u or .L).
+        - The value, if specified.  If no value is specified
+          we use "the next value".
+        - The expression to parse.
+
+        As with typedefs, the expression must fit all on one
+        line.
+        """
+        if value:
+            value = int(value)
+        elif self.prev_proto_value is not None:
+            value = self.prev_proto_value + 1
+        else:
+            raise ValueError('{0}: missing protocol value'.format(name))
+        if value < 0 or value > 255:
+            raise ValueError('{0}: protocol value {1} out of '
+                             'range'.format(name, value))
+        self.prev_proto_value = value
+
+        seq = sequencer.Sequencer(name)
+        fields = _parse_expr(seq, expr, self.typedefs)
+        cls = pfod.pfod(name, fields)
+        if DEBUG:
+            print('proto: {0} = {1}; '.format(name, value),
+                  end='', file=sys.stderr)
+            _debug_print_sequencer(seq)
+        if name in self.protocol:
+            raise ValueError('redefining {0}'.format(name))
+        self.protocol[name] = cls, value, proto_version, seq
+
+    def setup_header(self):
+        """
+        Handle header definition.
+
+        This is a bit gimmicky and uses some special cases,
+        because data is sized to dsize which is effectively
+        just size - 5.  We can't express this in our mini language,
+        so we just hard-code the sequencer and pfod.
+
+        In addition, the unpacker never gets the original packet's
+        size field, only the fcall and the data.
+        """
+        self.header_pfod = pfod.pfod('Header', 'size dsize fcall data')
+
+        seq = sequencer.Sequencer('Header-pack')
+        # size: 4 bytes
+        seq.append_encdec(None, sequencer.EncDecSimple('size', 4, None))
+        # fcall: 1 byte
+        seq.append_encdec(None, sequencer.EncDecSimple('fcall', 1, None))
+        # data: string of length dsize
+        seq.append_encdec(None, sequencer.EncDecA('dsize', 'data', None))
+        if DEBUG:
+            print('Header-pack:', file=sys.stderr)
+            _debug_print_sequencer(seq)
+        self.header_pack_seq = seq
+
+        seq = sequencer.Sequencer('Header-unpack')
+        seq.append_encdec(None, sequencer.EncDecSimple('fcall', 1, None))
+        seq.append_encdec(None, sequencer.EncDecA('dsize', 'data', None))
+        if DEBUG:
+            print('Header-unpack:', file=sys.stderr)
+            _debug_print_sequencer(seq)
+        self.header_unpack_seq = seq
+
+    def export(self, mod):
+        """
+        Dump results of internal parsing process
+        into our module namespace.
+
+        Note that we do not export the 's' typedef, which
+        did not define a data structure.
+
+        Check for name collisions while we're at it.
+        """
+        namespace = type('td', (object,), {})
+
+        # Export the typedefs (qid, stat).
+        setattr(mod, 'td', namespace)
+        for key in self.typedefs:
+            cls = self.typedefs[key][0]
+            if cls is None:
+                continue
+            setattr(namespace, key, cls)
+
+        # Export two sequencers for en/decoding stat fields
+        # (needed for reading directories and doing Twstat).
+        setattr(namespace, 'stat_seq', self.typedefs['stat'][1])
+        setattr(namespace, 'wirestat_seq', self.typedefs['wirestat'][1])
+
+        # Export the similar dirent decoder.
+        setattr(namespace, 'dirent_seq', self.typedefs['dirent'][1])
+
+        # Export the #define values
+        for key, val in self.defines.items():
+            if hasattr(namespace, key):
+                print('{0!r} is both a #define and a typedef'.format(key))
+                raise AssertionError('bad internal names')
+            setattr(namespace, key, val[0])
+
+        # Export Tattach, Rattach, Twrite, Rversion, etc values.
+        # Set up fcall_names[] table to map from value back to name.
+        # We also map fcall names to themselves, so given either a
+        # name or a byte code we can find out whether it's a valid
+        # fcall.
+        for key, val in self.protocol.items():
+            if hasattr(namespace, key):
+                prev_def = '#define' if key in self.defines else 'typedef'
+                print('{0!r} is both a {1} and a protocol '
+                      'value'.format(key, prev_def))
+                raise AssertionError('bad internal names')
+            setattr(namespace, key, val[1])
+            fcall_names[key] = key
+            fcall_names[val[1]] = key
+
+        # Hook up PFOD's for each protocol object -- for
+        # Tversion/Rversion, Twrite/Rwrite, Tlopen/Rlopen, etc.
+        # They go in the rrd name-space, and also in dictionaries
+        # per-protocol here, with the lookup pointing to a _PackInfo
+        # for the corresponding sequencer.
+        #
+        # Note that each protocol PFOD is optionally annotated with
+        # its specific version.  We know that .L > .u > plain; but
+        # all the "lesser" PFODs are available to all "greater"
+        # protocols at all times.
+        #
+        # (This is sort-of-wrong for Rerror vs Rlerror, but we
+        # don't bother to exclude Rerror from .L.)
+        #
+        # The PFODs themselves were already created, at parse time.
+        namespace = type('rrd', (object,), {})
+        setattr(mod, 'rrd', namespace)
+        for key, val in self.protocol.items():
+            cls = val[0]
+            proto_version = val[2]
+            seq = val[3]
+            packinfo = _PackInfo(seq)
+            if proto_version is None:
+                # all three protocols have it
+                self.plain[cls] = packinfo
+                self.dotu[cls] = packinfo
+                self.dotl[cls] = packinfo
+            elif proto_version == '.u':
+                # only .u and .L have it
+                self.dotu[cls] = packinfo
+                self.dotl[cls] = packinfo
+            elif proto_version == '.L':
+                # only .L has it
+                self.dotl[cls] = packinfo
+            else:
+                raise AssertionError('unknown protocol {1} for '
+                                     '{0}'.format(key, proto_version))
+            setattr(namespace, key, cls)
+
+_9p_data = _ProtoDefs()
+_9p_data.export(sys.modules[__name__])
+
+# Currently we look up by text-string, in lowercase.
+_9p_versions = {
+    '9p2000': _P9Proto({'version': '9P2000'},
+                       {'.u': False},
+                       _9p_data,
+                       _9p_data.plain,
+                       0),
+    '9p2000.u': _P9Proto({'version': '9P2000.u'},
+                         {'.u': True},
+                         _9p_data,
+                         _9p_data.dotu,
+                         1),
+    '9p2000.l': _P9Proto({'version': '9P2000.L'},
+                         {'.u': True},
+                         _9p_data,
+                         _9p_data.dotl,
+                         2),
+}
+def p9_version(vers_string):
+    """
+    Return protocol implementation of given version.  Raises
+    KeyError if the version is invalid.  Note that the KeyError
+    will be on a string-ified, lower-cased version of the vers_string
+    argument, even if it comes in as a bytes instance in py3k.
+    """
+    if not isinstance(vers_string, str) and isinstance(vers_string, bytes):
+        vers_string = vers_string.decode('utf-8', 'surrogateescape')
+    return _9p_versions[vers_string.lower()]
+
+plain = p9_version('9p2000')
+dotu = p9_version('9p2000.u')
+dotl = p9_version('9p2000.L')
+
+def qid_type2name(qidtype):
+    """
+    Convert qid type field to printable string.
+
+    >>> qid_type2name(td.QTDIR)
+    'dir'
+    >>> qid_type2name(td.QTAPPEND)
+    'append-only'
+    >>> qid_type2name(0xff)
+    'invalid(0xff)'
+    """
+    try:
+        # Is it ever OK to have multiple bits set,
+        # e.g., both QTAPPEND and QTEXCL?
+        return {
+            td.QTDIR: 'dir',
+            td.QTAPPEND: 'append-only',
+            td.QTEXCL: 'exclusive',
+            td.QTMOUNT: 'mount',
+            td.QTAUTH: 'auth',
+            td.QTTMP: 'tmp',
+            td.QTSYMLINK: 'symlink',
+            td.QTFILE: 'file',
+        }[qidtype]
+    except KeyError:
+        pass
+    return 'invalid({0:#x})'.format(qidtype)
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
Index: lib/lib9p/pytest/sequencer.py
===================================================================
--- /dev/null
+++ lib/lib9p/pytest/sequencer.py
@@ -0,0 +1,653 @@
+#! /usr/bin/env python
+
+from __future__ import print_function
+
+#__all__ = ['EncDec', 'EncDecSimple', 'EncDecTyped', 'EncDecA',
+#    'SequenceError', 'Sequencer']
+
+import abc
+import struct
+import sys
+
+_ProtoStruct = {
+    '1': struct.Struct('<B'),
+    '2': struct.Struct('<H'),
+    '4': struct.Struct('<I'),
+    '8': struct.Struct('<Q'),
+    '_string_': None,   # handled specially
+}
+for _i in (1, 2, 4, 8):
+    _ProtoStruct[_i] = _ProtoStruct[str(_i)]
+del _i
+
+class EncDec(object):
+    __metaclass__ = abc.ABCMeta
+    """
+    Base class for en/de-coders, which are put into sequencers.
+
+    All have a name and arbitrary user-supplied auxiliary data
+    (default=None).
+
+    All provide a pack() and unpack().  The pack() function
+    returns a "bytes" value.  This is internally implemented as a
+    function apack() that returns a list of struct.pack() bytes,
+    and pack() just joins them up as needed.
+
+    The pack/unpack functions take a dictionary of variable names
+    and values, and a second dictionary for conditionals, but at
+    this level conditionals don't apply: they are just being
+    passed through.  Variable names do apply to array encoders
+
+    EncDec also provide b2s() and s2b() static methods, which
+    convert strings to bytes and vice versa, as reversibly as
+    possible (using surrogateescape encoding). In Python2 this is
+    a no-op since the string type *is* the bytes type (<type
+    'unicode'>) is the unicode-ized string type).
+
+    EncDec also provides b2u() and u2b() to do conversion to/from
+    Unicode.
+
+    These are partly for internal use (all strings get converted
+    to UTF-8 byte sequences when coding a _string_ type) and partly
+    for doctests, where we just want some py2k/py3k compat hacks.
+    """
+    def __init__(self, name, aux):
+        self.name = name
+        self.aux = aux
+
+    @staticmethod
+    def b2u(byte_sequence):
+        "transform bytes to unicode"
+        return byte_sequence.decode('utf-8', 'surrogateescape')
+
+    @staticmethod
+    def u2b(unicode_sequence):
+        "transform unicode to bytes"
+        return unicode_sequence.encode('utf-8', 'surrogateescape')
+
+    if sys.version_info[0] >= 3:
+        b2s = b2u
+        @staticmethod
+        def s2b(string):
+            "transform string to bytes (leaves raw byte sequence unchanged)"
+            if isinstance(string, bytes):
+                return string
+            return string.encode('utf-8', 'surrogateescape')
+    else:
+        @staticmethod
+        def b2s(byte_sequence):
+            "transform bytes to string - no-op in python2.7"
+            return byte_sequence
+        @staticmethod
+        def s2b(string):
+            "transform string or unicode to bytes"
+            if isinstance(string, unicode):
+                return string.encode('utf-8', 'surrogateescape')
+            return string
+
+    def pack(self, vdict, cdict, val):
+        "encode value <val> into a byte-string"
+        return b''.join(self.apack(vdict, cdict, val))
+
+    @abc.abstractmethod
+    def apack(self, vdict, cdict, val):
+        "encode value <val> into [bytes1, b2, ..., bN]"
+
+    @abc.abstractmethod
+    def unpack(self, vdict, cdict, bstring, offset, noerror=False):
+        "unpack bytes from <bstring> at <offset>"
+
+
+class EncDecSimple(EncDec):
+    r"""
+    Encode/decode a simple (but named) field.  The field is not an
+    array, which requires using EncDecA, nor a typed object
+    like a qid or stat instance -- those require a Sequence and
+    EncDecTyped.
+
+    The format is one of '1'/1, '2'/2, '4'/4, '8'/8, or '_string_'.
+
+    Note: using b2s here is purely a doctest/tetsmod python2/python3
+    compat hack.  The output of e.pack is <type 'bytes'>; b2s
+    converts it to a string, purely for display purposes.  (It might
+    be better to map py2 output to bytes but they just print as a
+    string anyway.)  In normal use, you should not call b2s here.
+
+    >>> e = EncDecSimple('eggs', 2)
+    >>> e.b2s(e.pack({}, {}, 0))
+    '\x00\x00'
+    >>> e.b2s(e.pack({}, {}, 256))
+    '\x00\x01'
+
+    Values that cannot be packed produce a SequenceError:
+
+    >>> e.pack({}, {}, None)
+    Traceback (most recent call last):
+        ...
+    SequenceError: failed while packing 'eggs'=None
+    >>> e.pack({}, {}, -1)
+    Traceback (most recent call last):
+        ...
+    SequenceError: failed while packing 'eggs'=-1
+
+    Unpacking both returns a value, and tells how many bytes it
+    used out of the bytestring or byte-array argument.  If there
+    are not enough bytes remaining at the starting offset, it
+    raises a SequenceError, unless noerror=True (then unset
+    values are None)
+
+    >>> e.unpack({}, {}, b'\x00\x01', 0)
+    (256, 2)
+    >>> e.unpack({}, {}, b'', 0)
+    Traceback (most recent call last):
+        ...
+    SequenceError: out of data while unpacking 'eggs'
+    >>> e.unpack({}, {}, b'', 0, noerror=True)
+    (None, 2)
+
+    Note that strings can be provided as regular strings, byte
+    strings (same as regular strings in py2k), or Unicode strings
+    (same as regular strings in py3k).  Unicode strings will be
+    converted to UTF-8 before being packed.  Since this leaves
+    7-bit characters alone, these examples work in both py2k and
+    py3k.  (Note: the UTF-8 encoding of u'\u1234' is
+    '\0xe1\0x88\0xb4' or 225, 136, 180. The b2i trick below is
+    another py2k vs py3k special case just for doctests: py2k
+    tries to display the utf-8 encoded data as a string.)
+
+    >>> e = EncDecSimple('spam', '_string_')
+    >>> e.b2s(e.pack({}, {}, 'p3=unicode,p2=bytes'))
+    '\x13\x00p3=unicode,p2=bytes'
+
+    >>> e.b2s(e.pack({}, {}, b'bytes'))
+    '\x05\x00bytes'
+
+    >>> import sys
+    >>> ispy3k = sys.version_info[0] >= 3
+
+    >>> b2i = lambda x: x if ispy3k else ord(x)
+    >>> [b2i(x) for x in e.pack({}, {}, u'\u1234')]
+    [3, 0, 225, 136, 180]
+
+    The byte length of the utf-8 data cannot exceed 65535 since
+    the encoding has the length as a 2-byte field (a la the
+    encoding for 'eggs' here).  A too-long string produces
+    a SequenceError as well.
+
+    >>> e.pack({}, {}, 16384 * 'spam')
+    Traceback (most recent call last):
+        ...
+    SequenceError: string too long (len=65536) while packing 'spam'
+
+    Unpacking strings produces byte arrays.  (Of course,
+    in py2k these are also known as <type 'str'>.)
+
+    >>> unpacked = e.unpack({}, {}, b'\x04\x00data', 0)
+    >>> etype = bytes if ispy3k else str
+    >>> print(isinstance(unpacked[0], etype))
+    True
+    >>> e.b2s(unpacked[0])
+    'data'
+    >>> unpacked[1]
+    6
+
+    You may use e.b2s() to conver them to unicode strings in py3k,
+    or you may set e.autob2s.  This still only really does
+    anything in py3k, since py2k strings *are* bytes, so it's
+    really just intended for doctest purposes (see EncDecA):
+
+    >>> e.autob2s = True
+    >>> e.unpack({}, {}, b'\x07\x00stringy', 0)
+    ('stringy', 9)
+    """
+    def __init__(self, name, fmt, aux=None):
+        super(EncDecSimple, self).__init__(name, aux)
+        self.fmt = fmt
+        self.struct = _ProtoStruct[fmt]
+        self.autob2s = False
+
+    def __repr__(self):
+        if self.aux is None:
+            return '{0}({1!r}, {2!r})'.format(self.__class__.__name__,
+                self.name, self.fmt)
+        return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,
+            self.name, self.fmt, self.aux)
+
+    __str__ = __repr__
+
+    def apack(self, vdict, cdict, val):
+        "encode a value"
+        try:
+            if self.struct:
+                return [self.struct.pack(val)]
+            sval = self.s2b(val)
+            if len(sval) > 65535:
+                raise SequenceError('string too long (len={0:d}) '
+                    'while packing {1!r}'.format(len(sval), self.name))
+            return [EncDecSimple.string_len.pack(len(sval)), sval]
+        # Include AttributeError in case someone tries to, e.g.,
+        # pack name=None and self.s2b() tries to use .encode on it.
+        except (struct.error, AttributeError):
+            raise SequenceError('failed '
+                'while packing {0!r}={1!r}'.format(self.name, val))
+
+    def _unpack1(self, via, bstring, offset, noerror):
+        "internal function to unpack single item"
+        try:
+            tup = via.unpack_from(bstring, offset)
+        except struct.error as err:
+            if 'unpack_from requires a buffer of at least' in str(err):
+                if noerror:
+                    return None, offset + via.size
+                raise SequenceError('out of data '
+                    'while unpacking {0!r}'.format(self.name))
+            # not clear what to do here if noerror
+            raise SequenceError('failed '
+                'while unpacking {0!r}'.format(self.name))
+        assert len(tup) == 1
+        return tup[0], offset + via.size
+
+    def unpack(self, vdict, cdict, bstring, offset, noerror=False):
+        "decode a value; return the value and the new offset"
+        if self.struct:
+            return self._unpack1(self.struct, bstring, offset, noerror)
+        slen, offset = self._unpack1(EncDecSimple.string_len, bstring, offset,
+            noerror)
+        if slen is None:
+            return None, offset
+        nexto = offset + slen
+        if len(bstring) < nexto:
+            if noerror:
+                val = None
+            else:
+                raise SequenceError('out of data '
+                    'while unpacking {0!r}'.format(self.name))
+        else:
+            val = bstring[offset:nexto]
+            if self.autob2s:
+                val = self.b2s(val)
+        return val, nexto
+
+# string length: 2 byte unsigned field
+EncDecSimple.string_len = _ProtoStruct[2]
+
+class EncDecTyped(EncDec):
+    r"""
+    EncDec for typed objects (which are build from PFODs, which are
+    a sneaky class variant of OrderedDict similar to namedtuple).
+
+    Calling the klass() function with no arguments must create an
+    instance with all-None members.
+
+    We also require a Sequencer to pack and unpack the members of
+    the underlying pfod.
+
+    >>> qid_s = Sequencer('qid')
+    >>> qid_s.append_encdec(None, EncDecSimple('type', 1))
+    >>> qid_s.append_encdec(None, EncDecSimple('version', 4))
+    >>> qid_s.append_encdec(None, EncDecSimple('path', 8))
+    >>> len(qid_s)
+    3
+
+    >>> from pfod import pfod
+    >>> qid = pfod('qid', ['type', 'version', 'path'])
+    >>> len(qid._fields)
+    3
+    >>> qid_inst = qid(1, 2, 3)
+    >>> qid_inst
+    qid(type=1, version=2, path=3)
+
+    >>> e = EncDecTyped(qid, 'aqid', qid_s)
+    >>> e.b2s(e.pack({}, {}, qid_inst))
+    '\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00'
+    >>> e.unpack({}, {},
+    ... b'\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00', 0)
+    (qid(type=1, version=2, path=3), 13)
+
+    If an EncDecTyped instance has a conditional sequencer, note
+    that unpacking will leave un-selected items set to None (see
+    the Sequencer example below):
+
+    >>> breakfast = pfod('breakfast', 'eggs spam ham')
+    >>> breakfast()
+    breakfast(eggs=None, spam=None, ham=None)
+    >>> bfseq = Sequencer('breakfast')
+    >>> bfseq.append_encdec(None, EncDecSimple('eggs', 1))
+    >>> bfseq.append_encdec('yuck', EncDecSimple('spam', 1))
+    >>> bfseq.append_encdec(None, EncDecSimple('ham', 1))
+    >>> e = EncDecTyped(breakfast, 'bfname', bfseq)
+    >>> e.unpack({}, {'yuck': False}, b'\x02\x01\x04', 0)
+    (breakfast(eggs=2, spam=None, ham=1), 2)
+
+    This used just two of the three bytes: eggs=2, ham=1.
+
+    >>> e.unpack({}, {'yuck': True}, b'\x02\x01\x04', 0)
+    (breakfast(eggs=2, spam=1, ham=4), 3)
+
+    This used the third byte, so ham=4.
+    """
+    def __init__(self, klass, name, sequence, aux=None):
+        assert len(sequence) == len(klass()._fields) # temporary
+        super(EncDecTyped, self).__init__(name, aux)
+        self.klass = klass
+        self.name = name
+        self.sequence = sequence
+
+    def __repr__(self):
+        if self.aux is None:
+            return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,
+                self.klass, self.name, self.sequence)
+        return '{0}({1!r}, {2!r}, {3!r}, {4!r})'.format(self.__class__.__name__,
+            self.klass, self.name, self.sequence, self.aux)
+
+    __str__ = __repr__
+
+    def apack(self, vdict, cdict, val):
+        """
+        Pack each of our instance variables.
+
+        Note that some packing may be conditional.
+        """
+        return self.sequence.apack(val, cdict)
+
+    def unpack(self, vdict, cdict, bstring, offset, noerror=False):
+        """
+        Unpack each instance variable, into a new object of
+        self.klass.  Return the new instance and new offset.
+
+        Note that some unpacking may be conditional.
+        """
+        obj = self.klass()
+        offset = self.sequence.unpack_from(obj, cdict, bstring, offset, noerror)
+        return obj, offset
+
+class EncDecA(EncDec):
+    r"""
+    EncDec for arrays (repeated objects).
+
+    We take the name of repeat count variable, and a sub-coder
+    (Sequencer instance).  For instance, we can en/de-code
+    repeat='nwname' copies of name='wname', or nwname of
+    name='wqid', in a Twalk en/de-code.
+
+    Note that we don't pack or unpack the repeat count itself --
+    that must be done by higher level code.  We just get its value
+    from vdict.
+
+    >>> subcode = EncDecSimple('wname', '_string_')
+    >>> e = EncDecA('nwname', 'wname', subcode)
+    >>> e.b2s(e.pack({'nwname': 2}, {}, ['A', 'BC']))
+    '\x01\x00A\x02\x00BC'
+
+    >>> subcode.autob2s = True # so that A and BC decode to py3k str
+    >>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02\x00BC', 0)
+    (['A', 'BC'], 7)
+
+    When using noerror, the first sub-item that fails to decode
+    completely starts the None-s.  Strings whose length fails to
+    decode are assumed to be zero bytes long as well, for the
+    purpose of showing the expected packet length:
+
+    >>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02\x00', 0, noerror=True)
+    (['A', None], 7)
+    >>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02', 0, noerror=True)
+    (['A', None], 5)
+    >>> e.unpack({'nwname': 3}, {}, b'\x01\x00A\x02', 0, noerror=True)
+    (['A', None, None], 7)
+
+    As a special case, supplying None for the sub-coder
+    makes the repeated item pack or unpack a simple byte
+    string.  (Note that autob2s is not supported here.)
+    A too-short byte string is simply truncated!
+
+    >>> e = EncDecA('count', 'data', None)
+    >>> e.b2s(e.pack({'count': 5}, {}, b'12345'))
+    '12345'
+    >>> x = list(e.unpack({'count': 3}, {}, b'123', 0))
+    >>> x[0] = e.b2s(x[0])
+    >>> x
+    ['123', 3]
+    >>> x = list(e.unpack({'count': 3}, {}, b'12', 0, noerror=True))
+    >>> x[0] = e.b2s(x[0])
+    >>> x
+    ['12', 3]
+    """
+    def __init__(self, repeat, name, sub, aux=None):
+        super(EncDecA, self).__init__(name, aux)
+        self.repeat = repeat
+        self.name = name
+        self.sub = sub
+
+    def __repr__(self):
+        if self.aux is None:
+            return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,
+                self.repeat, self.name, self.sub)
+        return '{0}({1!r}, {2!r}, {3!r}, {4!r})'.format(self.__class__.__name__,
+            self.repeat, self.name, self.sub, self.aux)
+
+    __str__ = __repr__
+
+    def apack(self, vdict, cdict, val):
+        "pack each val[i], for i in range(vdict[self.repeat])"
+        num = vdict[self.repeat]
+        assert num == len(val)
+        if self.sub is None:
+            assert isinstance(val, bytes)
+            return [val]
+        parts = []
+        for i in val:
+            parts.extend(self.sub.apack(vdict, cdict, i))
+        return parts
+
+    def unpack(self, vdict, cdict, bstring, offset, noerror=False):
+        "unpack repeatedly, per self.repeat, into new array."
+        num = vdict[self.repeat]
+        if num is None and noerror:
+            num = 0
+        else:
+            assert num >= 0
+        if self.sub is None:
+            nexto = offset + num
+            if len(bstring) < nexto and not noerror:
+                raise SequenceError('out of data '
+                    'while unpacking {0!r}'.format(self.name))
+            return bstring[offset:nexto], nexto
+        array = []
+        for i in range(num):
+            obj, offset = self.sub.unpack(vdict, cdict, bstring, offset,
+                noerror)
+            array.append(obj)
+        return array, offset
+
+class SequenceError(Exception):
+    "sequence error: item too big, or ran out of data"
+    pass
+
+class Sequencer(object):
+    r"""
+    A sequencer is an object that packs (marshals) or unpacks
+    (unmarshals) a series of objects, according to their EncDec
+    instances.
+
+    The objects themselves (and their values) come from, or
+    go into, a dictionary: <vdict>, the first argument to
+    pack/unpack.
+
+    Some fields may be conditional.  The conditions are in a
+    separate dictionary (the second or <cdict> argument).
+
+    Some objects may be dictionaries or PFODs, e.g., they may
+    be a Plan9 qid or stat structure.  These have their own
+    sub-encoding.
+
+    As with each encoder, we have both an apack() function
+    (returns a list of parts) and a plain pack().  Users should
+    mostly stick with plain pack().
+
+    >>> s = Sequencer('monty')
+    >>> s
+    Sequencer('monty')
+    >>> e = EncDecSimple('eggs', 2)
+    >>> s.append_encdec(None, e)
+    >>> s.append_encdec(None, EncDecSimple('spam', 1))
+    >>> s[0]
+    (None, EncDecSimple('eggs', 2))
+    >>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {}))
+    '\x01\x02A'
+
+    When particular fields are conditional, they appear in
+    packed output, or are taken from the byte-string during
+    unpacking, only if their condition is true.
+
+    As with struct, use unpack_from to start at an arbitrary
+    offset and/or omit verification that the entire byte-string
+    is consumed.
+
+    >>> s = Sequencer('python')
+    >>> s.append_encdec(None, e)
+    >>> s.append_encdec('.u', EncDecSimple('spam', 1))
+    >>> s[1]
+    ('.u', EncDecSimple('spam', 1))
+    >>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {'.u': True}))
+    '\x01\x02A'
+    >>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {'.u': False}))
+    '\x01\x02'
+
+    >>> d = {}
+    >>> s.unpack(d, {'.u': True}, b'\x01\x02A')
+    >>> print(d['eggs'], d['spam'])
+    513 65
+    >>> d = {}
+    >>> s.unpack(d, {'.u': False}, b'\x01\x02A', 0)
+    Traceback (most recent call last):
+        ...
+    SequenceError: 1 byte(s) unconsumed
+    >>> s.unpack_from(d, {'.u': False}, b'\x01\x02A', 0)
+    2
+    >>> print(d)
+    {'eggs': 513}
+
+    The incoming dictionary-like object may be pre-initialized
+    if you like; only sequences that decode are filled-in:
+
+    >>> d = {'eggs': None, 'spam': None}
+    >>> s.unpack_from(d, {'.u': False}, b'\x01\x02A', 0)
+    2
+    >>> print(d['eggs'], d['spam'])
+    513 None
+
+    Some objects may be arrays; if so their EncDec is actually
+    an EncDecA, the repeat count must be in the dictionary, and
+    the object itself must have a len() and be index-able:
+
+    >>> s = Sequencer('arr')
+    >>> s.append_encdec(None, EncDecSimple('n', 1))
+    >>> ae = EncDecSimple('array', 2)
+    >>> s.append_encdec(None, EncDecA('n', 'array', ae))
+    >>> ae.b2s(s.pack({'n': 2, 'array': [257, 514]}, {}))
+    '\x02\x01\x01\x02\x02'
+
+    Unpacking an array creates a list of the number of items.
+    The EncDec encoder that decodes the number of items needs to
+    occur first in the sequencer, so that the dictionary will have
+    acquired the repeat-count variable's value by the time we hit
+    the array's encdec:
+
+    >>> d = {}
+    >>> s.unpack(d, {}, b'\x01\x04\x00')
+    >>> d['n'], d['array']
+    (1, [4])
+    """
+    def __init__(self, name):
+        self.name = name
+        self._codes = []
+        self.debug = False # or sys.stderr
+
+    def __repr__(self):
+        return '{0}({1!r})'.format(self.__class__.__name__, self.name)
+
+    __str__ = __repr__
+
+    def __len__(self):
+        return len(self._codes)
+
+    def __iter__(self):
+        return iter(self._codes)
+
+    def __getitem__(self, index):
+        return self._codes[index]
+
+    def dprint(self, *args, **kwargs):
+        if not self.debug:
+            return
+        if isinstance(self.debug, bool):
+            dest = sys.stdout
+        else:
+            dest = self.debug
+        print(*args, file=dest, **kwargs)
+
+    def append_encdec(self, cond, code):
+        "add EncDec en/de-coder, conditional on cond"
+        self._codes.append((cond, code))
+
+    def apack(self, vdict, cdict):
+        """
+        Produce packed representation of each field.
+        """
+        packed_data = []
+        for cond, code in self._codes:
+            # Skip this item if it's conditional on a false thing.
+            if cond is not None and not cdict[cond]:
+                self.dprint('skip %r - %r is False' % (code, cond))
+                continue
+
+            # Pack the item.
+            self.dprint('pack %r - no cond or %r is True' % (code, cond))
+            packed_data.extend(code.apack(vdict, cdict, vdict[code.name]))
+
+        return packed_data
+
+    def pack(self, vdict, cdict):
+        """
+        Flatten packed data.
+        """
+        return b''.join(self.apack(vdict, cdict))
+
+    def unpack_from(self, vdict, cdict, bstring, offset=0, noerror=False):
+        """
+        Unpack from byte string.
+
+        The values are unpacked into a dictionary vdict;
+        some of its entries may themselves be ordered
+        dictionaries created by typedefed codes.
+
+        Raises SequenceError if the string is too short,
+        unless you set noerror, in which case we assume
+        you want see what you can get out of the data.
+        """
+        for cond, code in self._codes:
+            # Skip this item if it's conditional on a false thing.
+            if cond is not None and not cdict[cond]:
+                self.dprint('skip %r - %r is False' % (code, cond))
+                continue
+
+            # Unpack the item.
+            self.dprint('unpack %r - no cond or %r is True' % (code, cond))
+            obj, offset = code.unpack(vdict, cdict, bstring, offset, noerror)
+            vdict[code.name] = obj
+
+        return offset
+
+    def unpack(self, vdict, cdict, bstring, noerror=False):
+        """
+        Like unpack_from but unless noerror=True, requires that
+        we completely use up the given byte string.
+        """
+        offset = self.unpack_from(vdict, cdict, bstring, 0, noerror)
+        if not noerror and offset != len(bstring):
+            raise SequenceError('{0} byte(s) unconsumed'.format(
+                len(bstring) - offset))
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
Index: lib/lib9p/pytest/testconf.ini.sample
===================================================================
--- /dev/null
+++ lib/lib9p/pytest/testconf.ini.sample
@@ -0,0 +1,16 @@
+# test configuration
+
+[client]
+server = localhost
+port = 12345
+# timeout is in seconds
+timeout = 0.1
+loglevel = INFO
+logfile = ./ctest.log
+# logfmt = ...
+# protocol = 9p2000, 9p2000.u, or 9p2000.L
+protocol = 9p2000.L
+only_dotl = true
+may_downgrade = False
+uname = anonymous
+n_uname = 1001
Index: lib/lib9p/request.c
===================================================================
--- /dev/null
+++ lib/lib9p/request.c
@@ -0,0 +1,1438 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/param.h>
+#include <sys/uio.h>
+#if defined(__FreeBSD__)
+#include <sys/sbuf.h>
+#else
+#include "sbuf/sbuf.h"
+#endif
+#include "lib9p.h"
+#include "lib9p_impl.h"
+#include "fcall.h"
+#include "fid.h"
+#include "hashtable.h"
+#include "log.h"
+#include "linux_errno.h"
+#include "backend/backend.h"
+#include "threadpool.h"
+
+#define N(x)    (sizeof(x) / sizeof(x[0]))
+
+static int l9p_dispatch_tversion(struct l9p_request *req);
+static int l9p_dispatch_tattach(struct l9p_request *req);
+static int l9p_dispatch_tclunk(struct l9p_request *req);
+static int l9p_dispatch_tcreate(struct l9p_request *req);
+static int l9p_dispatch_topen(struct l9p_request *req);
+static int l9p_dispatch_tread(struct l9p_request *req);
+static int l9p_dispatch_tremove(struct l9p_request *req);
+static int l9p_dispatch_tstat(struct l9p_request *req);
+static int l9p_dispatch_twalk(struct l9p_request *req);
+static int l9p_dispatch_twrite(struct l9p_request *req);
+static int l9p_dispatch_twstat(struct l9p_request *req);
+static int l9p_dispatch_tstatfs(struct l9p_request *req);
+static int l9p_dispatch_tlopen(struct l9p_request *req);
+static int l9p_dispatch_tlcreate(struct l9p_request *req);
+static int l9p_dispatch_tsymlink(struct l9p_request *req);
+static int l9p_dispatch_tmknod(struct l9p_request *req);
+static int l9p_dispatch_trename(struct l9p_request *req);
+static int l9p_dispatch_treadlink(struct l9p_request *req);
+static int l9p_dispatch_tgetattr(struct l9p_request *req);
+static int l9p_dispatch_tsetattr(struct l9p_request *req);
+static int l9p_dispatch_txattrwalk(struct l9p_request *req);
+static int l9p_dispatch_txattrcreate(struct l9p_request *req);
+static int l9p_dispatch_treaddir(struct l9p_request *req);
+static int l9p_dispatch_tfsync(struct l9p_request *req);
+static int l9p_dispatch_tlock(struct l9p_request *req);
+static int l9p_dispatch_tgetlock(struct l9p_request *req);
+static int l9p_dispatch_tlink(struct l9p_request *req);
+static int l9p_dispatch_tmkdir(struct l9p_request *req);
+static int l9p_dispatch_trenameat(struct l9p_request *req);
+static int l9p_dispatch_tunlinkat(struct l9p_request *req);
+
+/*
+ * Each Txxx handler has a "must run" flag.  If it is false,
+ * we check for a flush request before calling the handler.
+ * If a flush is already requested we can instantly fail the
+ * request with EINTR.
+ *
+ * Tclunk and Tremove must run because they make their fids
+ * become invalid.  Tversion and Tattach should never get
+ * a flush request applied (it makes no sense as the connection
+ * is not really running yet), so it should be harmless to
+ * set them either way, but for now we have them as must-run.
+ * Flushing a Tflush is not really allowed either so we keep
+ * these as must-run too (although they run without being done
+ * threaded anyway).
+ */
+struct l9p_handler {
+	enum l9p_ftype type;
+	int (*handler)(struct l9p_request *);
+	bool must_run;
+};
+
+static const struct l9p_handler l9p_handlers_no_version[] = {
+	{L9P_TVERSION, l9p_dispatch_tversion, true},
+};
+
+static const struct l9p_handler l9p_handlers_base[] = {
+	{L9P_TVERSION, l9p_dispatch_tversion, true},
+	{L9P_TATTACH, l9p_dispatch_tattach, true},
+	{L9P_TCLUNK, l9p_dispatch_tclunk, true},
+	{L9P_TFLUSH, l9p_threadpool_tflush, true},
+	{L9P_TCREATE, l9p_dispatch_tcreate, false},
+	{L9P_TOPEN, l9p_dispatch_topen, false},
+	{L9P_TREAD, l9p_dispatch_tread, false},
+	{L9P_TWRITE, l9p_dispatch_twrite, false},
+	{L9P_TREMOVE, l9p_dispatch_tremove, true},
+	{L9P_TSTAT, l9p_dispatch_tstat, false},
+	{L9P_TWALK, l9p_dispatch_twalk, false},
+	{L9P_TWSTAT, l9p_dispatch_twstat, false}
+};
+static const struct l9p_handler l9p_handlers_dotu[] = {
+	{L9P_TVERSION, l9p_dispatch_tversion, true},
+	{L9P_TATTACH, l9p_dispatch_tattach, true},
+	{L9P_TCLUNK, l9p_dispatch_tclunk, true},
+	{L9P_TFLUSH, l9p_threadpool_tflush, true},
+	{L9P_TCREATE, l9p_dispatch_tcreate, false},
+	{L9P_TOPEN, l9p_dispatch_topen, false},
+	{L9P_TREAD, l9p_dispatch_tread, false},
+	{L9P_TWRITE, l9p_dispatch_twrite, false},
+	{L9P_TREMOVE, l9p_dispatch_tremove, true},
+	{L9P_TSTAT, l9p_dispatch_tstat, false},
+	{L9P_TWALK, l9p_dispatch_twalk, false},
+	{L9P_TWSTAT, l9p_dispatch_twstat, false}
+};
+static const struct l9p_handler l9p_handlers_dotL[] = {
+	{L9P_TVERSION, l9p_dispatch_tversion, true},
+	{L9P_TATTACH, l9p_dispatch_tattach, true},
+	{L9P_TCLUNK, l9p_dispatch_tclunk, true},
+	{L9P_TFLUSH, l9p_threadpool_tflush, true},
+	{L9P_TCREATE, l9p_dispatch_tcreate, false},
+	{L9P_TOPEN, l9p_dispatch_topen, false},
+	{L9P_TREAD, l9p_dispatch_tread, false},
+	{L9P_TWRITE, l9p_dispatch_twrite, false},
+	{L9P_TREMOVE, l9p_dispatch_tremove, true},
+	{L9P_TSTAT, l9p_dispatch_tstat, false},
+	{L9P_TWALK, l9p_dispatch_twalk, false},
+	{L9P_TWSTAT, l9p_dispatch_twstat, false},
+	{L9P_TSTATFS, l9p_dispatch_tstatfs, false},
+	{L9P_TLOPEN, l9p_dispatch_tlopen, false},
+	{L9P_TLCREATE, l9p_dispatch_tlcreate, false},
+	{L9P_TSYMLINK, l9p_dispatch_tsymlink, false},
+	{L9P_TMKNOD, l9p_dispatch_tmknod, false},
+	{L9P_TRENAME, l9p_dispatch_trename, false},
+	{L9P_TREADLINK, l9p_dispatch_treadlink, false},
+	{L9P_TGETATTR, l9p_dispatch_tgetattr, false},
+	{L9P_TSETATTR, l9p_dispatch_tsetattr, false},
+	{L9P_TXATTRWALK, l9p_dispatch_txattrwalk, false},
+	{L9P_TXATTRCREATE, l9p_dispatch_txattrcreate, false},
+	{L9P_TREADDIR, l9p_dispatch_treaddir, false},
+	{L9P_TFSYNC, l9p_dispatch_tfsync, false},
+	{L9P_TLOCK, l9p_dispatch_tlock, true},
+	{L9P_TGETLOCK, l9p_dispatch_tgetlock, true},
+	{L9P_TLINK, l9p_dispatch_tlink, false},
+	{L9P_TMKDIR, l9p_dispatch_tmkdir, false},
+	{L9P_TRENAMEAT, l9p_dispatch_trenameat, false},
+	{L9P_TUNLINKAT, l9p_dispatch_tunlinkat, false},
+};
+
+/*
+ * NB: version index 0 is reserved for new connections, and
+ * is a protocol that handles only L9P_TVERSION.  Once we get a
+ * valid version, we start a new session using its dispatch table.
+ */
+static const struct {
+	const char *name;
+	const struct l9p_handler *handlers;
+	int n_handlers;
+} l9p_versions[] = {
+	{ "<none>", l9p_handlers_no_version, N(l9p_handlers_no_version) },
+	{ "9P2000", l9p_handlers_base, N(l9p_handlers_base) },
+	{ "9P2000.u", l9p_handlers_dotu, N(l9p_handlers_dotu), },
+	{ "9P2000.L", l9p_handlers_dotL, N(l9p_handlers_dotL), },
+};
+
+/*
+ * Run the appropriate handler for this request.
+ * It's our caller's responsibility to respond.
+ */
+int
+l9p_dispatch_request(struct l9p_request *req)
+{
+	struct l9p_connection *conn;
+#if defined(L9P_DEBUG)
+	struct sbuf *sb;
+#endif
+	size_t i, n;
+	const struct l9p_handler *handlers, *hp;
+	bool flush_requested;
+
+	conn = req->lr_conn;
+	flush_requested = req->lr_flushstate == L9P_FLUSH_REQUESTED_PRE_START;
+
+	handlers = l9p_versions[conn->lc_version].handlers;
+	n = (size_t)l9p_versions[conn->lc_version].n_handlers;
+	for (hp = handlers, i = 0; i < n; hp++, i++)
+		if (req->lr_req.hdr.type == hp->type)
+			goto found;
+	hp = NULL;
+found:
+
+#if defined(L9P_DEBUG)
+	sb = sbuf_new_auto();
+	if (flush_requested) {
+		sbuf_cat(sb, "FLUSH requested pre-dispatch");
+		if (hp != NULL && hp->must_run)
+			sbuf_cat(sb, ", but must run");
+		sbuf_cat(sb, ": ");
+	}
+	l9p_describe_fcall(&req->lr_req, conn->lc_version, sb);
+	sbuf_finish(sb);
+
+	L9P_LOG(L9P_DEBUG, "%s", sbuf_data(sb));
+	sbuf_delete(sb);
+#endif
+
+	if (hp != NULL) {
+		if (!flush_requested || hp->must_run)
+			return (hp->handler(req));
+		return (EINTR);
+	}
+
+	L9P_LOG(L9P_WARNING, "unknown request of type %d",
+	    req->lr_req.hdr.type);
+	return (ENOSYS);
+}
+
+/*
+ * Translate BSD errno to 9P2000/9P2000.u errno.
+ */
+static inline int
+e29p(int errnum)
+{
+	static int const table[] = {
+		[ENOTEMPTY] = EPERM,
+		[EDQUOT] = EPERM,
+		[ENOSYS] = EPERM,	/* ??? */
+	};
+
+	if ((size_t)errnum < N(table) && table[errnum] != 0)
+		return (table[errnum]);
+	if (errnum <= ERANGE)
+		return (errnum);
+	return (EIO);			/* ??? */
+}
+
+/*
+ * Translate BSD errno to Linux errno.
+ */
+static inline int
+e2linux(int errnum)
+{
+	static int const table[] = {
+		[EDEADLK] = LINUX_EDEADLK,
+		[EAGAIN] = LINUX_EAGAIN,
+		[EINPROGRESS] = LINUX_EINPROGRESS,
+		[EALREADY] = LINUX_EALREADY,
+		[ENOTSOCK] = LINUX_ENOTSOCK,
+		[EDESTADDRREQ] = LINUX_EDESTADDRREQ,
+		[EMSGSIZE] = LINUX_EMSGSIZE,
+		[EPROTOTYPE] = LINUX_EPROTOTYPE,
+		[ENOPROTOOPT] = LINUX_ENOPROTOOPT,
+		[EPROTONOSUPPORT] = LINUX_EPROTONOSUPPORT,
+		[ESOCKTNOSUPPORT] = LINUX_ESOCKTNOSUPPORT,
+		[EOPNOTSUPP] = LINUX_EOPNOTSUPP,
+		[EPFNOSUPPORT] = LINUX_EPFNOSUPPORT,
+		[EAFNOSUPPORT] = LINUX_EAFNOSUPPORT,
+		[EADDRINUSE] = LINUX_EADDRINUSE,
+		[EADDRNOTAVAIL] = LINUX_EADDRNOTAVAIL,
+		[ENETDOWN] = LINUX_ENETDOWN,
+		[ENETUNREACH] = LINUX_ENETUNREACH,
+		[ENETRESET] = LINUX_ENETRESET,
+		[ECONNABORTED] = LINUX_ECONNABORTED,
+		[ECONNRESET] = LINUX_ECONNRESET,
+		[ENOBUFS] = LINUX_ENOBUFS,
+		[EISCONN] = LINUX_EISCONN,
+		[ENOTCONN] = LINUX_ENOTCONN,
+		[ESHUTDOWN] = LINUX_ESHUTDOWN,
+		[ETOOMANYREFS] = LINUX_ETOOMANYREFS,
+		[ETIMEDOUT] = LINUX_ETIMEDOUT,
+		[ECONNREFUSED] = LINUX_ECONNREFUSED,
+		[ELOOP] = LINUX_ELOOP,
+		[ENAMETOOLONG] = LINUX_ENAMETOOLONG,
+		[EHOSTDOWN] = LINUX_EHOSTDOWN,
+		[EHOSTUNREACH] = LINUX_EHOSTUNREACH,
+		[ENOTEMPTY] = LINUX_ENOTEMPTY,
+		[EPROCLIM] = LINUX_EAGAIN,
+		[EUSERS] = LINUX_EUSERS,
+		[EDQUOT] = LINUX_EDQUOT,
+		[ESTALE] = LINUX_ESTALE,
+		[EREMOTE] = LINUX_EREMOTE,
+		/* EBADRPC = unmappable? */
+		/* ERPCMISMATCH = unmappable? */
+		/* EPROGUNAVAIL = unmappable? */
+		/* EPROGMISMATCH = unmappable? */
+		/* EPROCUNAVAIL = unmappable? */
+		[ENOLCK] = LINUX_ENOLCK,
+		[ENOSYS] = LINUX_ENOSYS,
+		/* EFTYPE = unmappable? */
+		/* EAUTH = unmappable? */
+		/* ENEEDAUTH = unmappable? */
+		[EIDRM] = LINUX_EIDRM,
+		[ENOMSG] = LINUX_ENOMSG,
+		[EOVERFLOW] = LINUX_EOVERFLOW,
+		[ECANCELED] = LINUX_ECANCELED,
+		[EILSEQ] = LINUX_EILSEQ,
+		/* EDOOFUS = unmappable? */
+		[EBADMSG] = LINUX_EBADMSG,
+		[EMULTIHOP] = LINUX_EMULTIHOP,
+		[ENOLINK] = LINUX_ENOLINK,
+		[EPROTO] = LINUX_EPROTO,
+		/* ENOTCAPABLE = unmappable? */
+		[ECAPMODE] = EPERM,
+#ifdef ENOTRECOVERABLE
+		[ENOTRECOVERABLE] = LINUX_ENOTRECOVERABLE,
+#endif
+#ifdef EOWNERDEAD
+		[EOWNERDEAD] = LINUX_EOWNERDEAD,
+#endif
+	};
+
+	/*
+	 * In case we want to return a raw Linux errno, allow negative
+	 * values a la Linux kernel internals.
+	 *
+	 * Values up to ERANGE are shared across systems (see
+	 * linux_errno.h), except for EAGAIN.
+	 */
+	if (errnum < 0)
+		return (-errnum);
+
+	if ((size_t)errnum < N(table) && table[errnum] != 0)
+		return (table[errnum]);
+
+	if (errnum <= ERANGE)
+		return (errnum);
+
+	L9P_LOG(L9P_WARNING, "cannot map errno %d to anything reasonable",
+	    errnum);
+
+	return (LINUX_ENOTRECOVERABLE);	/* ??? */
+}
+
+/*
+ * Send response to request, or possibly just drop request.
+ * We also need to know whether to remove the request from
+ * the tag hash table.
+ */
+void
+l9p_respond(struct l9p_request *req, bool drop, bool rmtag)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	size_t iosize;
+#if defined(L9P_DEBUG)
+	struct sbuf *sb;
+	const char *ftype;
+#endif
+	int error;
+
+	req->lr_resp.hdr.tag = req->lr_req.hdr.tag;
+
+	error = req->lr_error;
+	if (error == 0)
+		req->lr_resp.hdr.type = req->lr_req.hdr.type + 1;
+	else {
+		if (conn->lc_version == L9P_2000L) {
+			req->lr_resp.hdr.type = L9P_RLERROR;
+			req->lr_resp.error.errnum = (uint32_t)e2linux(error);
+		} else {
+			req->lr_resp.hdr.type = L9P_RERROR;
+			req->lr_resp.error.ename = strerror(error);
+			req->lr_resp.error.errnum = (uint32_t)e29p(error);
+		}
+	}
+
+#if defined(L9P_DEBUG)
+	sb = sbuf_new_auto();
+	l9p_describe_fcall(&req->lr_resp, conn->lc_version, sb);
+	sbuf_finish(sb);
+
+	switch (req->lr_flushstate) {
+	case L9P_FLUSH_NONE:
+		ftype = "";
+		break;
+	case L9P_FLUSH_REQUESTED_PRE_START:
+		ftype = "FLUSH requested pre-dispatch: ";
+		break;
+	case L9P_FLUSH_REQUESTED_POST_START:
+		ftype = "FLUSH requested while running: ";
+		break;
+	case L9P_FLUSH_TOOLATE:
+		ftype = "FLUSH requested too late: ";
+		break;
+	}
+	L9P_LOG(L9P_DEBUG, "%s%s%s",
+	    drop ? "DROP: " : "", ftype, sbuf_data(sb));
+	sbuf_delete(sb);
+#endif
+
+	error = drop ? 0 :
+	    l9p_pufcall(&req->lr_resp_msg, &req->lr_resp, conn->lc_version);
+	if (rmtag)
+		ht_remove(&conn->lc_requests, req->lr_req.hdr.tag);
+	if (error != 0) {
+		L9P_LOG(L9P_ERROR, "cannot pack response");
+		drop = true;
+	}
+
+	if (drop) {
+		conn->lc_lt.lt_drop_response(req,
+		    req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+		    conn->lc_lt.lt_aux);
+	} else {
+		iosize = req->lr_resp_msg.lm_size;
+
+		/*
+		 * Include I/O size in calculation for Rread and
+		 * Rreaddir responses.
+		 */
+		if (req->lr_resp.hdr.type == L9P_RREAD ||
+		    req->lr_resp.hdr.type == L9P_RREADDIR)
+			iosize += req->lr_resp.io.count;
+
+		conn->lc_lt.lt_send_response(req,
+		    req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+		    iosize, conn->lc_lt.lt_aux);
+	}
+
+	l9p_freefcall(&req->lr_req);
+	l9p_freefcall(&req->lr_resp);
+
+	free(req);
+}
+
+/*
+ * This allows a caller to iterate through the data in a
+ * read or write request (creating the data if packing,
+ * scanning through it if unpacking).  This is used for
+ * writing readdir entries, so mode should be L9P_PACK
+ * (but we allow L9P_UNPACK so that debug code can also scan
+ * through the data later, if desired).
+ *
+ * This relies on the Tread op having positioned the request's
+ * iov to the beginning of the data buffer (note the l9p_seek_iov
+ * in l9p_dispatch_tread).
+ */
+void
+l9p_init_msg(struct l9p_message *msg, struct l9p_request *req,
+    enum l9p_pack_mode mode)
+{
+
+	msg->lm_size = 0;
+	msg->lm_mode = mode;
+	msg->lm_cursor_iov = 0;
+	msg->lm_cursor_offset = 0;
+	msg->lm_niov = req->lr_data_niov;
+	memcpy(msg->lm_iov, req->lr_data_iov,
+	    sizeof (struct iovec) * req->lr_data_niov);
+}
+
+enum fid_lookup_flags {
+	F_REQUIRE_OPEN = 0x01,	/* require that the file be marked OPEN */
+	F_REQUIRE_DIR = 0x02,	/* require that the file be marked ISDIR */
+	F_REQUIRE_XATTR = 0x04,	/* require that the file be marked XATTR */
+	F_REQUIRE_AUTH = 0x08,	/* require that the fid be marked AUTH */
+	F_FORBID_OPEN = 0x10,	/* forbid that the file be marked OPEN */
+	F_FORBID_DIR = 0x20,	/* forbid that the file be marked ISDIR */
+	F_FORBID_XATTR = 0x40,	/* forbid that the file be marked XATTR */
+	F_ALLOW_AUTH = 0x80,	/* allow that the fid be marked AUTH */
+};
+
+/*
+ * Look up a fid.  It must correspond to a valid file, else we return
+ * the given errno (some "not a valid fid" calls must return EIO and
+ * some must return EINVAL and qemu returns ENOENT in other cases and
+ * so on, so we just provide a general "return this error number").
+ *
+ * Callers may also set constraints: fid must be (or not be) open,
+ * must be (or not be) a directory, must be (or not be) an xattr.
+ *
+ * Only one op has a fid that *must* be an auth fid.  Most ops forbid
+ * auth fids  So instead of FORBID we have ALLOW here and the default
+ * is FORBID.
+ */
+static inline int
+fid_lookup(struct l9p_connection *conn, uint32_t fid, int err, int flags,
+    struct l9p_fid **afile)
+{
+	struct l9p_fid *file;
+
+	file = ht_find(&conn->lc_files, fid);
+	if (file == NULL)
+		return (err);
+
+	/*
+	 * As soon as we go multithreaded / async, this
+	 * assert has to become "return EINVAL" or "return err".
+	 *
+	 * We may also need a way to mark a fid as
+	 * "in async op" (valid for some purposes, but cannot be
+	 * used elsewhere until async op is completed or aborted).
+	 *
+	 * For now, this serves for bug-detecting.
+	 */
+	assert(l9p_fid_isvalid(file));
+
+	/*
+	 * Note that we're inline expanded and flags is constant,
+	 * so unnecessary tests just drop out entirely.
+	 */
+	if ((flags & F_REQUIRE_OPEN) && !l9p_fid_isopen(file))
+		return (EINVAL);
+	if ((flags & F_FORBID_OPEN) && l9p_fid_isopen(file))
+		return (EINVAL);
+	if ((flags & F_REQUIRE_DIR) && !l9p_fid_isdir(file))
+		return (ENOTDIR);
+	if ((flags & F_FORBID_DIR) && l9p_fid_isdir(file))
+		return (EISDIR);
+	if ((flags & F_REQUIRE_XATTR) && !l9p_fid_isxattr(file))
+		return (EINVAL);
+	if ((flags & F_FORBID_XATTR) && l9p_fid_isxattr(file))
+		return (EINVAL);
+	if (l9p_fid_isauth(file)) {
+		if ((flags & (F_REQUIRE_AUTH | F_ALLOW_AUTH)) == 0)
+			return (EINVAL);
+	} else if (flags & F_REQUIRE_AUTH)
+		return (EINVAL);
+	*afile = file;
+	return (0);
+}
+
+/*
+ * Append variable-size stat object and adjust io count.
+ * Returns 0 if the entire stat object was packed, -1 if not.
+ * A fully packed object updates the request's io count.
+ *
+ * Caller must use their own private l9p_message object since
+ * a partially packed object will leave the message object in
+ * a useless state.
+ *
+ * Frees the stat object.
+ */
+int
+l9p_pack_stat(struct l9p_message *msg, struct l9p_request *req,
+    struct l9p_stat *st)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	uint16_t size = l9p_sizeof_stat(st, conn->lc_version);
+	int ret = 0;
+
+	assert(msg->lm_mode == L9P_PACK);
+
+	if (req->lr_resp.io.count + size > req->lr_req.io.count ||
+	    l9p_pustat(msg, st, conn->lc_version) < 0)
+		ret = -1;
+	else
+		req->lr_resp.io.count += size;
+	l9p_freestat(st);
+	return (ret);
+}
+
+static int
+l9p_dispatch_tversion(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_server *server = conn->lc_server;
+	enum l9p_version remote_version = L9P_INVALID_VERSION;
+	size_t i;
+	const char *remote_version_name;
+
+	for (i = 0; i < N(l9p_versions); i++) {
+		if (strcmp(req->lr_req.version.version,
+		    l9p_versions[i].name) == 0) {
+			remote_version = (enum l9p_version)i;
+			break;
+		}
+	}
+
+	if (remote_version == L9P_INVALID_VERSION) {
+		L9P_LOG(L9P_ERROR, "unsupported remote version: %s",
+		    req->lr_req.version.version);
+		return (ENOSYS);
+	}
+
+	remote_version_name = l9p_versions[remote_version].name;
+	L9P_LOG(L9P_INFO, "remote version: %s", remote_version_name);
+	L9P_LOG(L9P_INFO, "local version: %s",
+	    l9p_versions[server->ls_max_version].name);
+
+	conn->lc_version = MIN(remote_version, server->ls_max_version);
+	conn->lc_msize = MIN(req->lr_req.version.msize, conn->lc_msize);
+	conn->lc_max_io_size = conn->lc_msize - 24;
+	req->lr_resp.version.version = strdup(remote_version_name);
+	req->lr_resp.version.msize = conn->lc_msize;
+	return (0);
+}
+
+static int
+l9p_dispatch_tattach(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/*
+	 * We still don't have Tauth yet, but let's code this part
+	 * anyway.
+	 *
+	 * Look up the auth fid first since if it fails we can just
+	 * return immediately.
+	 */
+	if (req->lr_req.tattach.afid != L9P_NOFID) {
+		error = fid_lookup(conn, req->lr_req.tattach.afid, EINVAL,
+		    F_REQUIRE_AUTH, &req->lr_fid2);
+		if (error)
+			return (error);
+	} else
+		req->lr_fid2 = NULL;
+
+	fid = l9p_connection_alloc_fid(conn, req->lr_req.hdr.fid);
+	if (fid == NULL)
+		return (EINVAL);
+
+	be = conn->lc_server->ls_backend;
+
+	req->lr_fid = fid;
+
+	/* For backend convenience, set NONUNAME on 9P2000. */
+	if (conn->lc_version == L9P_2000)
+		req->lr_req.tattach.n_uname = L9P_NONUNAME;
+	error = be->attach(be->softc, req);
+
+	/*
+	 * On success, fid becomes valid; on failure, disconnect.
+	 * It certainly *should* be a directory here...
+	 */
+	if (error == 0) {
+		l9p_fid_setvalid(fid);
+		if (req->lr_resp.rattach.qid.type & L9P_QTDIR)
+			l9p_fid_setdir(fid);
+	} else
+		l9p_connection_remove_fid(conn, fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tclunk(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/* Note that clunk is the only way to dispose of an auth fid. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_ALLOW_AUTH, &fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	l9p_fid_unsetvalid(fid);
+
+	/*
+	 * If it's an xattr fid there must, by definition, be an
+	 * xattrclunk.  The xattrclunk function can only be NULL if
+	 * xattrwalk and xattrcreate are NULL or always return error.
+	 *
+	 * Q: do we want to allow async xattrclunk in case of very
+	 * large xattr create?  This will make things difficult,
+	 * so probably not.
+	 */
+	if (l9p_fid_isxattr(fid))
+		error = be->xattrclunk(be->softc, fid);
+	else
+		error = be->clunk(be->softc, fid);
+
+	/* fid is now gone regardless of any error return */
+	l9p_connection_remove_fid(conn, fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tcreate(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	uint32_t dmperm;
+	int error;
+
+	/* Incoming fid must represent a directory that has not been opened. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	dmperm = req->lr_req.tcreate.perm;
+#define MKDIR_OR_SIMILAR \
+    (L9P_DMDIR | L9P_DMSYMLINK | L9P_DMNAMEDPIPE | L9P_DMSOCKET | L9P_DMDEVICE)
+
+	/*
+	 * TODO:
+	 *  - check new file name
+	 *  - break out different kinds of create (file vs mkdir etc)
+	 *  - add async file-create (leaves req->lr_fid in limbo)
+	 *
+	 * A successful file-create changes the fid into an open file.
+	 */
+	error = be->create(be->softc, req);
+	if (error == 0 && (dmperm & MKDIR_OR_SIMILAR) == 0) {
+		l9p_fid_unsetdir(req->lr_fid);
+		l9p_fid_setopen(req->lr_fid);
+	}
+
+	return (error);
+}
+
+static int
+l9p_dispatch_topen(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_OPEN | F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - add async open (leaves req->lr_fid in limbo)
+	 */
+	error = be->open(be->softc, req);
+	if (error == 0)
+		l9p_fid_setopen(req->lr_fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tread(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/* Xattr fids are not open, so we need our own tests. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/*
+	 * Adjust so that writing messages (packing data) starts
+	 * right after the count field in the response.
+	 *
+	 * size[4] + Rread[1] + tag[2] + count[4] = 11
+	 */
+	l9p_seek_iov(req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+	    req->lr_data_iov, &req->lr_data_niov, 11);
+
+	/*
+	 * If it's an xattr fid there must, by definition, be an
+	 * xattrread.  The xattrread function can only be NULL if
+	 * xattrwalk and xattrcreate are NULL or always return error.
+	 *
+	 * TODO:
+	 *   separate out directory-read
+	 *   allow async read
+	 */
+	be = conn->lc_server->ls_backend;
+	fid = req->lr_fid;
+	if (l9p_fid_isxattr(fid)) {
+		error = be->xattrread(be->softc, req);
+	} else if (l9p_fid_isopen(fid)) {
+		error = be->read(be->softc, req);
+	} else {
+		error = EINVAL;
+	}
+
+	return (error);
+}
+
+static int
+l9p_dispatch_tremove(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/*
+	 * ?? Should we allow Tremove on auth fids? If so, do
+	 * we pretend it is just a Tclunk?
+	 */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	l9p_fid_unsetvalid(fid);
+
+	error = be->remove(be->softc, fid);
+	/* fid is now gone regardless of any error return */
+	l9p_connection_remove_fid(conn, fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tstat(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/* Allow Tstat on auth fid?  Seems harmless enough... */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_ALLOW_AUTH, &fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	req->lr_fid = fid;
+	error = be->stat(be->softc, req);
+
+	if (error == 0) {
+		if (l9p_fid_isauth(fid))
+			req->lr_resp.rstat.stat.qid.type |= L9P_QTAUTH;
+
+		/* should we check req->lr_resp.rstat.qid.type L9P_QTDIR bit? */
+		if (req->lr_resp.rstat.stat.qid.type &= L9P_QTDIR)
+			l9p_fid_setdir(fid);
+		else
+			l9p_fid_unsetdir(fid);
+	}
+
+	return (error);
+}
+
+static int
+l9p_dispatch_twalk(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid, *newfid;
+	uint16_t n;
+	int error;
+
+	/* Can forbid XATTR, but cannot require DIR. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &fid);
+	if (error)
+		return (error);
+
+	if (req->lr_req.twalk.hdr.fid != req->lr_req.twalk.newfid) {
+		newfid = l9p_connection_alloc_fid(conn,
+		    req->lr_req.twalk.newfid);
+		if (newfid == NULL)
+			return (EINVAL);
+	} else
+		newfid = fid;
+
+	be = conn->lc_server->ls_backend;
+	req->lr_fid = fid;
+	req->lr_newfid = newfid;
+	error = be->walk(be->softc, req);
+
+	/*
+	 * If newfid == fid, then fid itself has (potentially) changed,
+	 * but is still valid.  Otherwise set newfid valid on
+	 * success, and destroy it on error.
+	 */
+	if (newfid != fid) {
+		if (error == 0)
+			l9p_fid_setvalid(newfid);
+		else
+			l9p_connection_remove_fid(conn, newfid);
+	}
+
+	/*
+	 * If we walked any name elements, the last (n-1'th) qid
+	 * has the type (dir vs file) for the new fid.  Otherwise
+	 * the type of newfid is the same as fid.  Of course, if
+	 * n==0 and fid==newfid, fid is already set up correctly
+	 * as the whole thing was a big no-op, but it's safe to
+	 * copy its dir bit to itself.
+	 */
+	if (error == 0) {
+		n = req->lr_resp.rwalk.nwqid;
+		if (n > 0) {
+			if (req->lr_resp.rwalk.wqid[n - 1].type & L9P_QTDIR)
+				l9p_fid_setdir(newfid);
+		} else {
+			if (l9p_fid_isdir(fid))
+				l9p_fid_setdir(newfid);
+		}
+	}
+	return (error);
+}
+
+static int
+l9p_dispatch_twrite(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/* Cannot require open due to xattr write, but can forbid dir. */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+	    F_FORBID_DIR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/*
+	 * Adjust to point to the data to be written (a la
+	 * l9p_dispatch_tread, but we're pointing into the request
+	 * buffer rather than the response):
+	 *
+	 * size[4] + Twrite[1] + tag[2] + fid[4] + offset[8] + count[4] = 23
+	 */
+	l9p_seek_iov(req->lr_req_msg.lm_iov, req->lr_req_msg.lm_niov,
+	    req->lr_data_iov, &req->lr_data_niov, 23);
+
+	/*
+	 * Unlike read, write and xattrwrite are optional (for R/O fs).
+	 *
+	 * TODO:
+	 *   allow async write
+	 */
+	be = conn->lc_server->ls_backend;
+	fid = req->lr_fid;
+	if (l9p_fid_isxattr(fid)) {
+		error = be->xattrwrite != NULL ?
+		    be->xattrwrite(be->softc, req) : ENOSYS;
+	} else if (l9p_fid_isopen(fid)) {
+		error = be->write != NULL ?
+		    be->write(be->softc, req) : ENOSYS;
+	} else {
+		error = EINVAL;
+	}
+
+	return (error);
+}
+
+static int
+l9p_dispatch_twstat(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+	    F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	error = be->wstat != NULL ? be->wstat(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tstatfs(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* Should we allow statfs on auth fids? */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+	error = be->statfs(be->softc, req);
+	return (error);
+}
+
+static int
+l9p_dispatch_tlopen(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_OPEN | F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - add async open (leaves req->lr_fid in limbo)
+	 */
+	error = be->lopen != NULL ? be->lopen(be->softc, req) : ENOSYS;
+	if (error == 0)
+		l9p_fid_setopen(req->lr_fid);
+	return (error);
+}
+
+static int
+l9p_dispatch_tlcreate(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - check new file name
+	 *  - add async create (leaves req->lr_fid in limbo)
+	 */
+	error = be->lcreate != NULL ? be->lcreate(be->softc, req) : ENOSYS;
+	if (error == 0) {
+		l9p_fid_unsetdir(req->lr_fid);
+		l9p_fid_setopen(req->lr_fid);
+	}
+	return (error);
+}
+
+static int
+l9p_dispatch_tsymlink(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* This doesn't affect the containing dir; maybe allow OPEN? */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - check new file name
+	 */
+	error = be->symlink != NULL ? be->symlink(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tmknod(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* This doesn't affect the containing dir; maybe allow OPEN? */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - check new file name
+	 */
+	error = be->mknod != NULL ? be->mknod(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_trename(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* Rename directory or file (including symlink etc). */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/* Doesn't affect new dir fid; maybe allow OPEN? */
+	error = fid_lookup(conn, req->lr_req.trename.dfid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO:
+	 *  - check new file name (trename.name)
+	 */
+	error = be->rename != NULL ? be->rename(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_treadlink(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/*
+	 * The underlying readlink will fail unless it's a symlink,
+	 * and the back end has to check, but we might as well forbid
+	 * directories and open files here since it's cheap.
+	 */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->readlink != NULL ? be->readlink(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tgetattr(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->getattr != NULL ? be->getattr(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tsetattr(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->setattr != NULL ? be->setattr(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_txattrwalk(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid, *newfid;
+	int error;
+
+	/*
+	 * Not sure if we care if file-or-dir is open or not.
+	 * However, the fid argument should always be a file or
+	 * dir and the newfid argument must be supplied, must
+	 * be different, and always becomes a new xattr,
+	 * so this is not very much like Twalk.
+	 */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_XATTR, &fid);
+	if (error)
+		return (error);
+
+	newfid = l9p_connection_alloc_fid(conn, req->lr_req.txattrwalk.newfid);
+	if (newfid == NULL)
+		return (EINVAL);
+
+	be = conn->lc_server->ls_backend;
+
+	req->lr_fid = fid;
+	req->lr_newfid = newfid;
+	error = be->xattrwalk != NULL ? be->xattrwalk(be->softc, req) : ENOSYS;
+
+	/*
+	 * Success/fail is similar to Twalk, except that we need
+	 * to set the xattr type bit in the new fid.  It's also
+	 * much simpler since newfid is always a new fid.
+	 */
+	if (error == 0) {
+		l9p_fid_setvalid(newfid);
+		l9p_fid_setxattr(newfid);
+	} else {
+		l9p_connection_remove_fid(conn, newfid);
+	}
+	return (error);
+}
+
+static int
+l9p_dispatch_txattrcreate(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	struct l9p_fid *fid;
+	int error;
+
+	/*
+	 * Forbid incoming open fid since it's going to become an
+	 * xattr fid instead.  If it turns out we need to allow
+	 * it, fs code will need to handle this.
+	 *
+	 * Curiously, qemu 9pfs uses ENOENT for a bad txattrwalk
+	 * fid, but EINVAL for txattrcreate (so we do too).
+	 */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL,
+	    F_FORBID_XATTR | F_FORBID_OPEN, &fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	req->lr_fid = fid;
+	error = be->xattrcreate != NULL ? be->xattrcreate(be->softc, req) :
+	    ENOSYS;
+
+	/*
+	 * On success, fid has changed from a regular (file or dir)
+	 * fid to an xattr fid.
+	 */
+	if (error == 0) {
+		l9p_fid_unsetdir(fid);
+		l9p_fid_setxattr(fid);
+	}
+	return (error);
+}
+
+static int
+l9p_dispatch_treaddir(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_REQUIRE_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/*
+	 * Adjust so that writing messages (packing data) starts
+	 * right after the count field in the response.
+	 *
+	 * size[4] + Rreaddir[1] + tag[2] + count[4] = 11
+	 */
+	l9p_seek_iov(req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov,
+	    req->lr_data_iov, &req->lr_data_niov, 11);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->readdir != NULL ? be->readdir(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tfsync(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->fsync != NULL ? be->fsync(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tlock(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/* Forbid directories? */
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO: multiple client handling; perhaps async locking.
+	 */
+	error = be->lock != NULL ? be->lock(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tgetlock(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/*
+	 * TODO: multiple client handling; perhaps async locking.
+	 */
+	error = be->getlock != NULL ? be->getlock(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tlink(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	/*
+	 * Note, dfid goes into fid2 in current scheme.
+	 *
+	 * Allow open dir?  Target dir fid is not modified...
+	 */
+	error = fid_lookup(conn, req->lr_req.tlink.dfid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2);
+	if (error)
+		return (error);
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_FORBID_DIR | F_FORBID_XATTR, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	error = be->link != NULL ? be->link(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tmkdir(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	/* Slashes embedded in the name are not allowed */
+	if (strchr(req->lr_req.tlcreate.name, '/') != NULL)
+		return (EINVAL);
+
+	be = conn->lc_server->ls_backend;
+	error = be->mkdir != NULL ? be->mkdir(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_trenameat(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	error = fid_lookup(conn, req->lr_req.trenameat.newdirfid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/* TODO: check old and new names */
+	error = be->renameat != NULL ? be->renameat(be->softc, req) : ENOSYS;
+	return (error);
+}
+
+static int
+l9p_dispatch_tunlinkat(struct l9p_request *req)
+{
+	struct l9p_connection *conn = req->lr_conn;
+	struct l9p_backend *be;
+	int error;
+
+	error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT,
+	    F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid);
+	if (error)
+		return (error);
+
+	be = conn->lc_server->ls_backend;
+
+	/* TODO: check dir-or-file name */
+	error = be->unlinkat != NULL ? be->unlinkat(be->softc, req) : ENOSYS;
+	return (error);
+}
Index: lib/lib9p/rfuncs.h
===================================================================
--- /dev/null
+++ lib/lib9p/rfuncs.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2016 Chris Torek <chris.torek@gmail.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_RFUNCS_H
+#define LIB9P_RFUNCS_H
+
+#include <grp.h>
+#include <pwd.h>
+#include <string.h>
+
+#if defined(WITH_CASPER)
+#include <libcasper.h>
+#endif
+
+/*
+ * Reentrant, optionally-malloc-ing versions of
+ * basename() and dirname().
+ */
+char	*r_basename(const char *, char *, size_t);
+char	*r_dirname(const char *, char *, size_t);
+
+/*
+ * Yuck: getpwuid, getgrgid are not thread-safe, and the
+ * POSIX replacements (getpwuid_r, getgrgid_r) are horrible.
+ * This is to allow us to loop over the get.*_r calls with ever
+ * increasing buffers until they succeed or get unreasonable
+ * (same idea as the libc code for the non-reentrant versions,
+ * although prettier).
+ *
+ * The getpwuid/getgrgid functions auto-init one of these,
+ * but the caller must call r_pgfree() when done with the
+ * return values.
+ *
+ * If we need more later, we may have to expose the init function.
+ */
+struct r_pgdata {
+	char	*r_pgbuf;
+	size_t	r_pgbufsize;
+	union {
+		struct passwd un_pw;
+		struct group un_gr;
+	} r_pgun;
+};
+
+/* void r_pginit(struct r_pgdata *); */
+void r_pgfree(struct r_pgdata *);
+struct passwd *r_getpwuid(uid_t, struct r_pgdata *);
+struct group *r_getgrgid(gid_t, struct r_pgdata *);
+
+#if defined(WITH_CASPER)
+struct passwd *r_cap_getpwuid(cap_channel_t *, uid_t, struct r_pgdata *);
+struct group *r_cap_getgrgid(cap_channel_t *, gid_t, struct r_pgdata *);
+#endif
+
+#endif	/* LIB9P_RFUNCS_H */
Index: lib/lib9p/rfuncs.c
===================================================================
--- /dev/null
+++ lib/lib9p/rfuncs.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright 2016 Chris Torek <chris.torek@gmail.com>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#if defined(WITH_CASPER)
+#include <libcasper.h>
+#include <casper/cap_pwd.h>
+#include <casper/cap_grp.h>
+#endif
+
+#include "rfuncs.h"
+
+/*
+ * This is essentially a clone of the BSD basename_r function,
+ * which is like POSIX basename() but puts the result in a user
+ * supplied buffer.
+ *
+ * In BSD basename_r, the buffer must be least MAXPATHLEN bytes
+ * long.  In our case we take the size of the buffer as an argument.
+ *
+ * Note that it's impossible in general to do this without
+ * a temporary buffer since basename("foo/bar") is "bar",
+ * but basename("foo/bar/") is still "bar" -- no trailing
+ * slash is allowed.
+ *
+ * The return value is your supplied buffer <buf>, or NULL if
+ * the length of the basename of the supplied <path> equals or
+ * exceeds your indicated <bufsize>.
+ *
+ * As a special but useful case, if you supply NULL for the <buf>
+ * argument, we allocate the buffer dynamically to match the
+ * basename, i.e., the result is basically strdup()ed for you.
+ * In this case <bufsize> is ignored (recommended: pass 0 here).
+ */
+char *
+r_basename(const char *path, char *buf, size_t bufsize)
+{
+	const char *endp, *comp;
+	size_t len;
+
+	/*
+	 * NULL or empty path means ".".  This is perhaps overly
+	 * forgiving but matches libc basename_r(), and avoids
+	 * breaking the code below.
+	 */
+	if (path == NULL || *path == '\0') {
+		comp = ".";
+		len = 1;
+	} else {
+		/*
+		 * Back up over any trailing slashes.  If we reach
+		 * the top of the path and it's still a trailing
+		 * slash, it's also a leading slash and the entire
+		 * path is just "/" (or "//", or "///", etc).
+		 */
+		endp = path + strlen(path) - 1;
+		while (*endp == '/' && endp > path)
+			endp--;
+		/* Invariant: *endp != '/' || endp == path */
+		if (*endp == '/') {
+			/* then endp==path and hence entire path is "/" */
+			comp = "/";
+			len = 1;
+		} else {
+			/*
+			 * We handled empty strings earlier, and
+			 * we just proved *endp != '/'.  Hence
+			 * we have a non-empty basename, ending
+			 * at endp.
+			 *
+			 * Back up one path name component.  The
+			 * part between these two is the basename.
+			 *
+			 * Note that we only stop backing up when
+			 * either comp==path, or comp[-1] is '/'.
+			 *
+			 * Suppose path[0] is '/'.  Then, since *endp
+			 * is *not* '/', we had comp>path initially, and
+			 * stopped backing up because we found a '/'
+			 * (perhaps path[0], perhaps a later '/').
+			 *
+			 * Or, suppose path[0] is NOT '/'.  Then,
+			 * either there are no '/'s at all and
+			 * comp==path, or comp[-1] is '/'.
+			 *
+			 * In all cases, we want all bytes from *comp
+			 * to *endp, inclusive.
+			 */
+			comp = endp;
+			while (comp > path && comp[-1] != '/')
+				comp--;
+			len = (size_t)(endp - comp + 1);
+		}
+	}
+	if (buf == NULL) {
+		buf = malloc(len + 1);
+		if (buf == NULL)
+			return (NULL);
+	} else {
+		if (len >= bufsize) {
+			errno = ENAMETOOLONG;
+			return (NULL);
+		}
+	}
+	memcpy(buf, comp, len);
+	buf[len] = '\0';
+	return (buf);
+}
+
+/*
+ * This is much like POSIX dirname(), but is reentrant.
+ *
+ * We examine a path, find the directory portion, and copy that
+ * to a user supplied buffer <buf> of the given size <bufsize>.
+ *
+ * Note that dirname("/foo/bar/") is "/foo", dirname("/foo") is "/",
+ * and dirname("////") is "/". However, dirname("////foo/bar") is
+ * "////foo" (we do not resolve these leading slashes away -- this
+ * matches the BSD libc behavior).
+ *
+ * The return value is your supplied buffer <buf>, or NULL if
+ * the length of the dirname of the supplied <path> equals or
+ * exceeds your indicated <bufsize>.
+ *
+ * As a special but useful case, if you supply NULL for the <buf>
+ * argument, we allocate the buffer dynamically to match the
+ * dirname, i.e., the result is basically strdup()ed for you.
+ * In this case <bufsize> is ignored (recommended: pass 0 here).
+ */
+char *
+r_dirname(const char *path, char *buf, size_t bufsize)
+{
+	const char *endp, *dirpart;
+	size_t len;
+
+	/*
+	 * NULL or empty path means ".".  This is perhaps overly
+	 * forgiving but matches libc dirname(), and avoids breaking
+	 * the code below.
+	 */
+	if (path == NULL || *path == '\0') {
+		dirpart = ".";
+		len = 1;
+	} else {
+		/*
+		 * Back up over any trailing slashes, then back up
+		 * one path name, then back up over more slashes.
+		 * In all cases, stop as soon as endp==path so
+		 * that we do not back out of the buffer entirely.
+		 *
+		 * The first loop takes care of trailing slashes
+		 * in names like "/foo/bar//" (where the dirname
+		 * part is to be "/foo"), the second strips out
+		 * the non-dir-name part, and the third leaves us
+		 * pointing to the end of the directory component.
+		 *
+		 * If the entire name is of the form "/foo" or
+		 * "//foo" (or "/foo/", etc, but we already
+		 * handled trailing slashes), we end up pointing
+		 * to the leading "/", which is what we want; but
+		 * if it is of the form "foo" (or "foo/", etc) we
+		 * point to a non-slash.  So, if (and only if)
+		 * endp==path AND *endp is not '/', the dirname is
+		 * ".", but in all cases, the LENGTH of the
+		 * dirname is (endp-path+1).
+		 */
+		endp = path + strlen(path) - 1;
+		while (endp > path && *endp == '/')
+			endp--;
+		while (endp > path && *endp != '/')
+			endp--;
+		while (endp > path && *endp == '/')
+			endp--;
+
+		len = (size_t)(endp - path + 1);
+		if (endp == path && *endp != '/')
+			dirpart = ".";
+		else
+			dirpart = path;
+	}
+	if (buf == NULL) {
+		buf = malloc(len + 1);
+		if (buf == NULL)
+			return (NULL);
+	} else {
+		if (len >= bufsize) {
+			errno = ENAMETOOLONG;
+			return (NULL);
+		}
+	}
+	memcpy(buf, dirpart, len);
+	buf[len] = '\0';
+	return (buf);
+}
+
+static void
+r_pginit(struct r_pgdata *pg)
+{
+
+	/* Note: init to half size since the first thing we do is double it */
+	pg->r_pgbufsize = 1 << 9;
+	pg->r_pgbuf = NULL;	/* note that realloc(NULL) == malloc */
+}
+
+static int
+r_pgexpand(struct r_pgdata *pg)
+{
+	size_t nsize;
+
+	nsize = pg->r_pgbufsize << 1;
+	if (nsize >= (1 << 20) ||
+	    (pg->r_pgbuf = realloc(pg->r_pgbuf, nsize)) == NULL)
+		return (ENOMEM);
+	return (0);
+}
+
+void
+r_pgfree(struct r_pgdata *pg)
+{
+
+	free(pg->r_pgbuf);
+}
+
+struct passwd *
+r_getpwuid(uid_t uid, struct r_pgdata *pg)
+{
+	struct passwd *result = NULL;
+	int error;
+
+	r_pginit(pg);
+	do {
+		error = r_pgexpand(pg);
+		if (error == 0)
+			error = getpwuid_r(uid, &pg->r_pgun.un_pw,
+			    pg->r_pgbuf, pg->r_pgbufsize, &result);
+	} while (error == ERANGE);
+
+	return (error ? NULL : result);
+}
+
+struct group *
+r_getgrgid(gid_t gid, struct r_pgdata *pg)
+{
+	struct group *result = NULL;
+	int error;
+
+	r_pginit(pg);
+	do {
+		error = r_pgexpand(pg);
+		if (error == 0)
+			error = getgrgid_r(gid, &pg->r_pgun.un_gr,
+			    pg->r_pgbuf, pg->r_pgbufsize, &result);
+	} while (error == ERANGE);
+
+	return (error ? NULL : result);
+}
+
+#if defined(WITH_CASPER)
+struct passwd *
+r_cap_getpwuid(cap_channel_t *cap, uid_t uid, struct r_pgdata *pg)
+{
+	struct passwd *result = NULL;
+	int error;
+
+	r_pginit(pg);
+	do {
+		error = r_pgexpand(pg);
+		if (error == 0)
+			error = cap_getpwuid_r(cap, uid, &pg->r_pgun.un_pw,
+			    pg->r_pgbuf, pg->r_pgbufsize, &result);
+	} while (error == ERANGE);
+
+	return (error ? NULL : result);
+}
+
+struct group *
+r_cap_getgrgid(cap_channel_t *cap, gid_t gid, struct r_pgdata *pg)
+{
+	struct group *result = NULL;
+	int error;
+
+	r_pginit(pg);
+	do {
+		error = r_pgexpand(pg);
+		if (error == 0)
+			error = cap_getgrgid_r(cap, gid, &pg->r_pgun.un_gr,
+			    pg->r_pgbuf, pg->r_pgbufsize, &result);
+	} while (error == ERANGE);
+
+	return (error ? NULL : result);
+}
+#endif
Index: lib/lib9p/sbuf/sbuf.h
===================================================================
--- /dev/null
+++ lib/lib9p/sbuf/sbuf.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Minimal libsbuf reimplementation for Mac OS X.
+ */
+
+#ifndef LIB9P_SBUF_H
+#define LIB9P_SBUF_H
+
+#include <stdarg.h>
+
+struct sbuf
+{
+	char *s_buf;
+	int s_size;
+	int s_capacity;
+	int s_position;
+};
+
+struct sbuf *sbuf_new_auto(void);
+int sbuf_cat(struct sbuf *s, const char *str);
+int sbuf_printf(struct sbuf *s, const char *fmt, ...);
+int sbuf_vprintf(struct sbuf *s, const char *fmt, va_list args);
+int sbuf_done(struct sbuf *s);
+void sbuf_delete(struct sbuf *s);
+int sbuf_finish(struct sbuf *s);
+char *sbuf_data(struct sbuf *s);
+
+#endif /* LIB9P_SBUF_H */
+
Index: lib/lib9p/sbuf/sbuf.c
===================================================================
--- /dev/null
+++ lib/lib9p/sbuf/sbuf.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Minimal libsbuf reimplementation for Mac OS X.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include "sbuf.h"
+
+#define	SBUF_INITIAL_SIZE	128
+
+struct sbuf *
+sbuf_new_auto()
+{
+	struct sbuf *s;
+
+	s = malloc(sizeof(struct sbuf));
+	s->s_buf = calloc(1, SBUF_INITIAL_SIZE + 1);
+	s->s_capacity = s->s_buf != NULL ? SBUF_INITIAL_SIZE : 0;
+	s->s_size = 0;
+
+	return (s);
+}
+
+int
+sbuf_cat(struct sbuf *s, const char *str)
+{
+	int req = (int)strlen(str);
+
+	if (s->s_size + req >= s->s_capacity) {
+		s->s_capacity = s->s_size + req + 1;
+		s->s_buf = realloc(s->s_buf, (size_t)s->s_capacity);
+	}
+	if (s->s_buf == NULL)
+		return (-1);
+
+	strcpy(s->s_buf + s->s_size, str);
+	s->s_size += req;
+
+	return (0);
+}
+
+int
+sbuf_printf(struct sbuf *s, const char *fmt, ...)
+{
+	int ret;
+	va_list ap;
+
+	va_start(ap, fmt);
+	ret = sbuf_vprintf(s, fmt, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+int
+sbuf_vprintf(struct sbuf *s, const char *fmt, va_list args)
+{
+	va_list copy;
+	int req;
+
+	va_copy(copy, args);
+	req = vsnprintf(NULL, 0, fmt, copy);
+	va_end(copy);
+
+	if (s->s_size + req >= s->s_capacity) {
+		s->s_capacity = s->s_size + req + 1;
+		s->s_buf = realloc(s->s_buf, (size_t)s->s_capacity);
+	}
+	if (s->s_buf == NULL)
+		return (-1);
+
+	req = vsnprintf(s->s_buf + s->s_size, req + 1, fmt, args);
+	s->s_size += req;
+
+	return (0);
+}
+
+char *
+sbuf_data(struct sbuf *s)
+{
+	return (s->s_buf);
+}
+
+int
+sbuf_finish(struct sbuf *s)
+{
+	if (s->s_buf != NULL)
+		s->s_buf[s->s_size] = '\0';
+	return (0);
+}
+
+void
+sbuf_delete(struct sbuf *s)
+{
+	free(s->s_buf);
+	free(s);
+}
Index: lib/lib9p/threadpool.h
===================================================================
--- /dev/null
+++ lib/lib9p/threadpool.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef	LIB9P_THREADPOOL_H
+#define	LIB9P_THREADPOOL_H
+
+#include <stdbool.h>
+#include <pthread.h>
+#include <sys/queue.h>
+#include "lib9p.h"
+
+STAILQ_HEAD(l9p_request_queue, l9p_request);
+
+/*
+ * Most of the workers in the threadpool run requests.
+ *
+ * One distinguished worker delivers responses from the
+ * response queue.  The reason this worker exists is to
+ * guarantee response order, so that flush responses go
+ * after their flushed requests.
+ */
+struct l9p_threadpool {
+    struct l9p_connection *	ltp_conn;	/* the connection */
+    struct l9p_request_queue	ltp_workq;	/* requests awaiting a worker */
+    struct l9p_request_queue	ltp_replyq;	/* requests that are done */
+    pthread_mutex_t		ltp_mtx;	/* locks queues and cond vars */
+    pthread_cond_t		ltp_work_cv;	/* to signal regular workers */
+    pthread_cond_t		ltp_reply_cv;	/* to signal reply-worker */
+    LIST_HEAD(, l9p_worker)	ltp_workers;	/* list of all workers */
+};
+
+/*
+ * All workers, including the responder, use this as their
+ * control structure.  (The only thing that distinguishes the
+ * responder is that it runs different code and waits on the
+ * reply_cv.)
+ */
+struct l9p_worker {
+    struct l9p_threadpool *	ltw_tp;
+    pthread_t			ltw_thread;
+    bool			ltw_exiting;
+    bool			ltw_responder;
+    LIST_ENTRY(l9p_worker)	ltw_link;
+};
+
+/*
+ * Each request has a "work state" telling where the request is,
+ * in terms of workers working on it.  That is, this tells us
+ * which threadpool queue, if any, the request is in now or would
+ * go in, or what's happening with it.
+ */
+enum l9p_workstate {
+	L9P_WS_NOTSTARTED,		/* not yet started */
+	L9P_WS_IMMEDIATE,		/* Tflush being done sans worker */
+	L9P_WS_INPROGRESS,		/* worker is working on it */
+	L9P_WS_RESPQUEUED,		/* worker is done, response queued */
+	L9P_WS_REPLYING,		/* responder is in final reply path */
+};
+
+/*
+ * Each request has a "flush state", initally NONE meaning no
+ * Tflush affected the request.
+ *
+ * If a Tflush comes in before we ever assign a work thread,
+ * the flush state goes to FLUSH_REQUESTED_PRE_START.
+ *
+ * If a Tflush comes in after we assign a work thread, the
+ * flush state goes to FLUSH_REQUESTED_POST_START.  The flush
+ * request may be too late: the request might finish anyway.
+ * Or it might be soon enough to abort.  In all cases, though, the
+ * operation requesting the flush (the "flusher") must wait for
+ * the other request (the "flushee") to go through the respond
+ * path.  The respond routine gets to decide whether to send a
+ * normal response, send an error, or drop the request
+ * entirely.
+ *
+ * There's one especially annoying case: what if a Tflush comes in
+ * *while* we're sending a response?  In this case it's too late:
+ * the flush just waits for the fully-composed response.
+ */
+enum l9p_flushstate {
+	L9P_FLUSH_NONE = 0,		/* must be zero */
+	L9P_FLUSH_REQUESTED_PRE_START,	/* not even started before flush */
+	L9P_FLUSH_REQUESTED_POST_START,	/* started, then someone said flush */
+	L9P_FLUSH_TOOLATE		/* too late, already responding */
+};
+
+void	l9p_threadpool_flushee_done(struct l9p_request *);
+int	l9p_threadpool_init(struct l9p_threadpool *, int);
+void	l9p_threadpool_run(struct l9p_threadpool *, struct l9p_request *);
+int	l9p_threadpool_shutdown(struct l9p_threadpool *);
+int	l9p_threadpool_tflush(struct l9p_request *);
+
+#endif	/* LIB9P_THREADPOOL_H  */
Index: lib/lib9p/threadpool.c
===================================================================
--- /dev/null
+++ lib/lib9p/threadpool.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <pthread.h>
+#if defined(__FreeBSD__)
+#include <pthread_np.h>
+#endif
+#include <sys/queue.h>
+#include "lib9p.h"
+#include "threadpool.h"
+
+static void l9p_threadpool_rflush(struct l9p_threadpool *tp,
+    struct l9p_request *req);
+
+static void *
+l9p_responder(void *arg)
+{
+	struct l9p_threadpool *tp;
+	struct l9p_worker *worker = arg;
+	struct l9p_request *req;
+
+	tp = worker->ltw_tp;
+	for (;;) {
+		/* get next reply to send */
+		pthread_mutex_lock(&tp->ltp_mtx);
+		while (STAILQ_EMPTY(&tp->ltp_replyq) && !worker->ltw_exiting)
+			pthread_cond_wait(&tp->ltp_reply_cv, &tp->ltp_mtx);
+		if (worker->ltw_exiting) {
+			pthread_mutex_unlock(&tp->ltp_mtx);
+			break;
+		}
+
+		/* off reply queue */
+		req = STAILQ_FIRST(&tp->ltp_replyq);
+		STAILQ_REMOVE_HEAD(&tp->ltp_replyq, lr_worklink);
+
+		/* request is now in final glide path, can't be Tflush-ed */
+		req->lr_workstate = L9P_WS_REPLYING;
+
+		/* any flushers waiting for this request can go now */
+		if (req->lr_flushstate != L9P_FLUSH_NONE)
+			l9p_threadpool_rflush(tp, req);
+
+		pthread_mutex_unlock(&tp->ltp_mtx);
+
+		/* send response */
+		l9p_respond(req, false, true);
+	}
+	return (NULL);
+}
+
+static void *
+l9p_worker(void *arg)
+{
+	struct l9p_threadpool *tp;
+	struct l9p_worker *worker = arg;
+	struct l9p_request *req;
+
+	tp = worker->ltw_tp;
+	pthread_mutex_lock(&tp->ltp_mtx);
+	for (;;) {
+		while (STAILQ_EMPTY(&tp->ltp_workq) && !worker->ltw_exiting)
+			pthread_cond_wait(&tp->ltp_work_cv, &tp->ltp_mtx);
+		if (worker->ltw_exiting)
+			break;
+
+		/* off work queue; now work-in-progress, by us */
+		req = STAILQ_FIRST(&tp->ltp_workq);
+		STAILQ_REMOVE_HEAD(&tp->ltp_workq, lr_worklink);
+		req->lr_workstate = L9P_WS_INPROGRESS;
+		req->lr_worker = worker;
+		pthread_mutex_unlock(&tp->ltp_mtx);
+
+		/* actually try the request */
+		req->lr_error = l9p_dispatch_request(req);
+
+		/* move to responder queue, updating work-state */
+		pthread_mutex_lock(&tp->ltp_mtx);
+		req->lr_workstate = L9P_WS_RESPQUEUED;
+		req->lr_worker = NULL;
+		STAILQ_INSERT_TAIL(&tp->ltp_replyq, req, lr_worklink);
+
+		/* signal the responder */
+		pthread_cond_signal(&tp->ltp_reply_cv);
+	}
+	pthread_mutex_unlock(&tp->ltp_mtx);
+	return (NULL);
+}
+
+/*
+ * Just before finally replying to a request that got touched by
+ * a Tflush request, we enqueue its flushers (requests of type
+ * Tflush, which are now on the flushee's lr_flushq) onto the
+ * response queue.
+ */
+static void
+l9p_threadpool_rflush(struct l9p_threadpool *tp, struct l9p_request *req)
+{
+	struct l9p_request *flusher;
+
+	/*
+	 * https://swtch.com/plan9port/man/man9/flush.html says:
+	 *
+	 * "Should multiple Tflushes be received for a pending
+	 * request, they must be answered in order.  A Rflush for
+	 * any of the multiple Tflushes implies an answer for all
+	 * previous ones.  Therefore, should a server receive a
+	 * request and then multiple flushes for that request, it
+	 * need respond only to the last flush."  This means
+	 * we could march through the queue of flushers here,
+	 * marking all but the last one as "to be dropped" rather
+	 * than "to be replied-to".
+	 *
+	 * However, we'll leave that for later, if ever -- it
+	 * should be harmless to respond to each, in order.
+	 */
+	STAILQ_FOREACH(flusher, &req->lr_flushq, lr_flushlink) {
+		flusher->lr_workstate = L9P_WS_RESPQUEUED;
+#ifdef notdef
+		if (not the last) {
+			flusher->lr_flushstate = L9P_FLUSH_NOT_RUN;
+			/* or, flusher->lr_drop = true ? */
+		}
+#endif
+		STAILQ_INSERT_TAIL(&tp->ltp_replyq, flusher, lr_worklink);
+	}
+}
+
+int
+l9p_threadpool_init(struct l9p_threadpool *tp, int size)
+{
+	struct l9p_worker *worker;
+#if defined(__FreeBSD__)
+	char threadname[16];
+#endif
+	int error;
+	int i, nworkers, nresponders;
+
+	if (size <= 0)
+		return (EINVAL);
+	error = pthread_mutex_init(&tp->ltp_mtx, NULL);
+	if (error)
+		return (error);
+	error = pthread_cond_init(&tp->ltp_work_cv, NULL);
+	if (error)
+		goto fail_work_cv;
+	error = pthread_cond_init(&tp->ltp_reply_cv, NULL);
+	if (error)
+		goto fail_reply_cv;
+
+	STAILQ_INIT(&tp->ltp_workq);
+	STAILQ_INIT(&tp->ltp_replyq);
+	LIST_INIT(&tp->ltp_workers);
+
+	nresponders = 0;
+	nworkers = 0;
+	for (i = 0; i <= size; i++) {
+		worker = calloc(1, sizeof(struct l9p_worker));
+		worker->ltw_tp = tp;
+		worker->ltw_responder = i == 0;
+		error = pthread_create(&worker->ltw_thread, NULL,
+		    worker->ltw_responder ? l9p_responder : l9p_worker,
+		    (void *)worker);
+		if (error) {
+			free(worker);
+			break;
+		}
+		if (worker->ltw_responder)
+			nresponders++;
+		else
+			nworkers++;
+
+#if defined(__FreeBSD__)
+		if (worker->ltw_responder) {
+			pthread_set_name_np(worker->ltw_thread, "9p-responder");
+		} else {
+			sprintf(threadname, "9p-worker:%d", i - 1);
+			pthread_set_name_np(worker->ltw_thread, threadname);
+		}
+#endif
+
+		LIST_INSERT_HEAD(&tp->ltp_workers, worker, ltw_link);
+	}
+	if (nresponders == 0 || nworkers == 0) {
+		/* need the one responder, and at least one worker */
+		l9p_threadpool_shutdown(tp);
+		return (error);
+	}
+	return (0);
+
+	/*
+	 * We could avoid these labels by having multiple destroy
+	 * paths (one for each error case), or by having booleans
+	 * for which variables were initialized.  Neither is very
+	 * appealing...
+	 */
+fail_reply_cv:
+	pthread_cond_destroy(&tp->ltp_work_cv);
+fail_work_cv:
+	pthread_mutex_destroy(&tp->ltp_mtx);
+
+	return (error);
+}
+
+/*
+ * Run a request, usually by queueing it.
+ */
+void
+l9p_threadpool_run(struct l9p_threadpool *tp, struct l9p_request *req)
+{
+
+	/*
+	 * Flush requests must be handled specially, since they
+	 * can cancel / kill off regular requests.  (But we can
+	 * run them through the regular dispatch mechanism.)
+	 */
+	if (req->lr_req.hdr.type == L9P_TFLUSH) {
+		/* not on a work queue yet so we can touch state */
+		req->lr_workstate = L9P_WS_IMMEDIATE;
+		(void) l9p_dispatch_request(req);
+	} else {
+		pthread_mutex_lock(&tp->ltp_mtx);
+		req->lr_workstate = L9P_WS_NOTSTARTED;
+		STAILQ_INSERT_TAIL(&tp->ltp_workq, req, lr_worklink);
+		pthread_cond_signal(&tp->ltp_work_cv);
+		pthread_mutex_unlock(&tp->ltp_mtx);
+	}
+}
+
+/*
+ * Run a Tflush request.  Called via l9p_dispatch_request() since
+ * it has some debug code in it, but not called from worker thread.
+ */
+int
+l9p_threadpool_tflush(struct l9p_request *req)
+{
+	struct l9p_connection *conn;
+	struct l9p_threadpool *tp;
+	struct l9p_request *flushee;
+	uint16_t oldtag;
+	enum l9p_flushstate nstate;
+
+	/*
+	 * Find what we're supposed to flush (the flushee, as it were).
+	 */
+	req->lr_error = 0;	/* Tflush always succeeds */
+	conn = req->lr_conn;
+	tp = &conn->lc_tp;
+	oldtag = req->lr_req.tflush.oldtag;
+	ht_wrlock(&conn->lc_requests);
+	flushee = ht_find_locked(&conn->lc_requests, oldtag);
+	if (flushee == NULL) {
+		/*
+		 * Nothing to flush!  The old request must have
+		 * been done and gone already.  Just queue this
+		 * Tflush for a success reply.
+		 */
+		ht_unlock(&conn->lc_requests);
+		pthread_mutex_lock(&tp->ltp_mtx);
+		goto done;
+	}
+
+	/*
+	 * Found the original request.  We'll need to inspect its
+	 * work-state to figure out what to do.
+	 */
+	pthread_mutex_lock(&tp->ltp_mtx);
+	ht_unlock(&conn->lc_requests);
+
+	switch (flushee->lr_workstate) {
+
+	case L9P_WS_NOTSTARTED:
+		/*
+		 * Flushee is on work queue, but not yet being
+		 * handled by a worker.
+		 *
+		 * The documentation -- see
+		 * http://ericvh.github.io/9p-rfc/rfc9p2000.html
+		 * https://swtch.com/plan9port/man/man9/flush.html
+		 * -- says that "the server should answer the
+		 * flush message immediately".  However, Linux
+		 * sends flush requests for operations that
+		 * must finish, such as Tclunk, and it's not
+		 * possible to *answer* the flush request until
+		 * it has been handled (if necessary) or aborted
+		 * (if allowed).
+		 *
+		 * We therefore now just  the original request
+		 * and let the request-handler do whatever is
+		 * appropriate.  NOTE: we could have a table of
+		 * "requests that can be aborted without being
+		 * run" vs "requests that must be run to be
+		 * aborted", but for now that seems like an
+		 * unnecessary complication.
+		 */
+		nstate = L9P_FLUSH_REQUESTED_PRE_START;
+		break;
+
+	case L9P_WS_IMMEDIATE:
+		/*
+		 * This state only applies to Tflush requests, and
+		 * flushing a Tflush is illegal.  But we'll do nothing
+		 * special here, which will make us act like a flush
+		 * request for the flushee that arrived too late to
+		 * do anything about the flushee.
+		 */
+		nstate = L9P_FLUSH_REQUESTED_POST_START;
+		break;
+
+	case L9P_WS_INPROGRESS:
+		/*
+		 * Worker thread flushee->lr_worker is working on it.
+		 * Kick it to get it out of blocking system calls.
+		 * (This requires that it carefully set up some
+		 * signal handlers, and may be FreeBSD-dependent,
+		 * it probably cannot be handled this way on MacOS.)
+		 */
+#ifdef notyet
+		pthread_kill(...);
+#endif
+		nstate = L9P_FLUSH_REQUESTED_POST_START;
+		break;
+
+	case L9P_WS_RESPQUEUED:
+		/*
+		 * The flushee is already in the response queue.
+		 * We'll just mark it as having had some flush
+		 * action applied.
+		 */
+		nstate = L9P_FLUSH_TOOLATE;
+		break;
+
+	case L9P_WS_REPLYING:
+		/*
+		 * Although we found the flushee, it's too late to
+		 * make us depend on it: it's already heading out
+		 * the door as a reply.
+		 *
+		 * We don't want to do anything to the flushee.
+		 * Instead, we want to work the same way as if
+		 * we had never found the tag.
+		 */
+		goto done;
+	}
+
+	/*
+	 * Now add us to the list of Tflush-es that are waiting
+	 * for the flushee (creating the list if needed, i.e., if
+	 * this is the first Tflush for the flushee).  We (req)
+	 * will get queued for reply later, when the responder
+	 * processes the flushee and calls l9p_threadpool_rflush().
+	 */
+	if (flushee->lr_flushstate == L9P_FLUSH_NONE)
+		STAILQ_INIT(&flushee->lr_flushq);
+	flushee->lr_flushstate = nstate;
+	STAILQ_INSERT_TAIL(&flushee->lr_flushq, req, lr_flushlink);
+
+	pthread_mutex_unlock(&tp->ltp_mtx);
+
+	return (0);
+
+done:
+	/*
+	 * This immediate op is ready to be replied-to now, so just
+	 * stick it onto the reply queue.
+	 */
+	req->lr_workstate = L9P_WS_RESPQUEUED;
+	STAILQ_INSERT_TAIL(&tp->ltp_replyq, req, lr_worklink);
+	pthread_mutex_unlock(&tp->ltp_mtx);
+	pthread_cond_signal(&tp->ltp_reply_cv);
+	return (0);
+}
+
+int
+l9p_threadpool_shutdown(struct l9p_threadpool *tp)
+{
+	struct l9p_worker *worker, *tmp;
+
+	LIST_FOREACH_SAFE(worker, &tp->ltp_workers, ltw_link, tmp) {
+		pthread_mutex_lock(&tp->ltp_mtx);
+		worker->ltw_exiting = true;
+		if (worker->ltw_responder)
+			pthread_cond_signal(&tp->ltp_reply_cv);
+		else
+			pthread_cond_broadcast(&tp->ltp_work_cv);
+		pthread_mutex_unlock(&tp->ltp_mtx);
+		pthread_join(worker->ltw_thread, NULL);
+		LIST_REMOVE(worker, ltw_link);
+		free(worker);
+	}
+	pthread_cond_destroy(&tp->ltp_reply_cv);
+	pthread_cond_destroy(&tp->ltp_work_cv);
+	pthread_mutex_destroy(&tp->ltp_mtx);
+
+	return (0);
+}
Index: lib/lib9p/transport/socket.h
===================================================================
--- /dev/null
+++ lib/lib9p/transport/socket.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LIB9P_SOCKET_H
+#define LIB9P_SOCKET_H
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "../lib9p.h"
+
+int l9p_start_server(struct l9p_server *server, const char *host,
+    const char *port);
+void l9p_socket_accept(struct l9p_server *server, int conn_fd,
+    struct sockaddr *client_addr, socklen_t client_addr_len);
+
+#endif /* LIB9P_SOCKET_H */
Index: lib/lib9p/transport/socket.c
===================================================================
--- /dev/null
+++ lib/lib9p/transport/socket.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sys/types.h>
+#ifdef __APPLE__
+# include "../apple_endian.h"
+#else
+# include <sys/endian.h>
+#endif
+#include <sys/socket.h>
+#include <sys/event.h>
+#include <sys/uio.h>
+#include <netdb.h>
+#include "../lib9p.h"
+#include "../lib9p_impl.h"
+#include "../log.h"
+#include "socket.h"
+
+struct l9p_socket_softc
+{
+	struct l9p_connection *ls_conn;
+	struct sockaddr ls_sockaddr;
+	socklen_t ls_socklen;
+	pthread_t ls_thread;
+	int ls_fd;
+};
+
+static int l9p_socket_readmsg(struct l9p_socket_softc *, void **, size_t *);
+static int l9p_socket_get_response_buffer(struct l9p_request *,
+    struct iovec *, size_t *, void *);
+static int l9p_socket_send_response(struct l9p_request *, const struct iovec *,
+    const size_t, const size_t, void *);
+static void l9p_socket_drop_response(struct l9p_request *, const struct iovec *,
+    size_t, void *);
+static void *l9p_socket_thread(void *);
+static ssize_t xread(int, void *, size_t);
+static ssize_t xwrite(int, void *, size_t);
+
+int
+l9p_start_server(struct l9p_server *server, const char *host, const char *port)
+{
+	struct addrinfo *res, *res0, hints;
+	struct kevent kev[2];
+	struct kevent event[2];
+	int err, kq, i, val, evs, nsockets = 0;
+	int sockets[2];
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = PF_UNSPEC;
+	hints.ai_socktype = SOCK_STREAM;
+	err = getaddrinfo(host, port, &hints, &res0);
+
+	if (err)
+		return (-1);
+
+	for (res = res0; res; res = res->ai_next) {
+		int s = socket(res->ai_family, res->ai_socktype,
+		    res->ai_protocol);
+
+		val = 1;
+		setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
+
+		if (s < 0)
+			continue;
+
+		if (bind(s, res->ai_addr, res->ai_addrlen) < 0) {
+			close(s);
+			continue;
+		}
+
+		sockets[nsockets] = s;
+		EV_SET(&kev[nsockets++], s, EVFILT_READ, EV_ADD | EV_ENABLE, 0,
+		    0, 0);
+		listen(s, 10);
+	}
+
+	if (nsockets < 1) {
+		L9P_LOG(L9P_ERROR, "bind(): %s", strerror(errno));
+		return(-1);
+	}
+
+	kq = kqueue();
+
+	if (kevent(kq, kev, nsockets, NULL, 0, NULL) < 0) {
+		L9P_LOG(L9P_ERROR, "kevent(): %s", strerror(errno));
+		return (-1);
+	}
+
+	for (;;) {
+		evs = kevent(kq, NULL, 0, event, nsockets, NULL);
+		if (evs < 0) {
+			if (errno == EINTR)
+				continue;
+
+			L9P_LOG(L9P_ERROR, "kevent(): %s", strerror(errno));
+			return (-1);
+		}
+
+		for (i = 0; i < evs; i++) {
+			struct sockaddr client_addr;
+			socklen_t client_addr_len = sizeof(client_addr);
+			int news = accept((int)event[i].ident, &client_addr,
+			    &client_addr_len);
+
+			if (news < 0) {
+				L9P_LOG(L9P_WARNING, "accept(): %s",
+				    strerror(errno));
+				continue;
+			}
+
+			l9p_socket_accept(server, news, &client_addr,
+			    client_addr_len);
+		}
+	}
+
+}
+
+void
+l9p_socket_accept(struct l9p_server *server, int conn_fd,
+    struct sockaddr *client_addr, socklen_t client_addr_len)
+{
+	struct l9p_socket_softc *sc;
+	struct l9p_connection *conn;
+	char host[NI_MAXHOST + 1];
+	char serv[NI_MAXSERV + 1];
+	int err;
+
+	err = getnameinfo(client_addr, client_addr_len, host, NI_MAXHOST, serv,
+	    NI_MAXSERV, NI_NUMERICHOST | NI_NUMERICSERV);
+
+	if (err != 0) {
+		L9P_LOG(L9P_WARNING, "cannot look up client name: %s",
+		    gai_strerror(err));
+	} else {
+		L9P_LOG(L9P_INFO, "new connection from %s:%s", host, serv);
+	}
+
+	if (l9p_connection_init(server, &conn) != 0) {
+		L9P_LOG(L9P_ERROR, "cannot create new connection");
+		return;
+	}
+
+	sc = l9p_calloc(1, sizeof(*sc));
+	sc->ls_conn = conn;
+	sc->ls_fd = conn_fd;
+
+	/*
+	 * Fill in transport handler functions and aux argument.
+	 */
+	conn->lc_lt.lt_aux = sc;
+	conn->lc_lt.lt_get_response_buffer = l9p_socket_get_response_buffer;
+	conn->lc_lt.lt_send_response = l9p_socket_send_response;
+	conn->lc_lt.lt_drop_response = l9p_socket_drop_response;
+
+	err = pthread_create(&sc->ls_thread, NULL, l9p_socket_thread, sc);
+	if (err) {
+		L9P_LOG(L9P_ERROR,
+		    "pthread_create (for connection from %s:%s): error %s",
+		    host, serv, strerror(err));
+		l9p_connection_close(sc->ls_conn);
+		free(sc);
+	}
+}
+
+static void *
+l9p_socket_thread(void *arg)
+{
+	struct l9p_socket_softc *sc = (struct l9p_socket_softc *)arg;
+	struct iovec iov;
+	void *buf;
+	size_t length;
+
+	for (;;) {
+		if (l9p_socket_readmsg(sc, &buf, &length) != 0)
+			break;
+
+		iov.iov_base = buf;
+		iov.iov_len = length;
+		l9p_connection_recv(sc->ls_conn, &iov, 1, NULL);
+		free(buf);
+	}
+
+	L9P_LOG(L9P_INFO, "connection closed");
+	l9p_connection_close(sc->ls_conn);
+	free(sc);
+	return (NULL);
+}
+
+static int
+l9p_socket_readmsg(struct l9p_socket_softc *sc, void **buf, size_t *size)
+{
+	uint32_t msize;
+	size_t toread;
+	ssize_t ret;
+	void *buffer;
+	int fd = sc->ls_fd;
+
+	assert(fd > 0);
+
+	buffer = l9p_malloc(sizeof(uint32_t));
+
+	ret = xread(fd, buffer, sizeof(uint32_t));
+	if (ret < 0) {
+		L9P_LOG(L9P_ERROR, "read(): %s", strerror(errno));
+		return (-1);
+	}
+
+	if (ret != sizeof(uint32_t)) {
+		if (ret == 0)
+			L9P_LOG(L9P_DEBUG, "%p: EOF", (void *)sc->ls_conn);
+		else
+			L9P_LOG(L9P_ERROR,
+			    "short read: %zd bytes of %zd expected",
+			    ret, sizeof(uint32_t));
+		return (-1);
+	}
+
+	msize = le32toh(*(uint32_t *)buffer);
+	toread = msize - sizeof(uint32_t);
+	buffer = l9p_realloc(buffer, msize);
+
+	ret = xread(fd, (char *)buffer + sizeof(uint32_t), toread);
+	if (ret < 0) {
+		L9P_LOG(L9P_ERROR, "read(): %s", strerror(errno));
+		return (-1);
+	}
+
+	if (ret != (ssize_t)toread) {
+		L9P_LOG(L9P_ERROR, "short read: %zd bytes of %zd expected",
+		    ret, toread);
+		return (-1);
+	}
+
+	*size = msize;
+	*buf = buffer;
+	L9P_LOG(L9P_INFO, "%p: read complete message, buf=%p size=%d",
+	    (void *)sc->ls_conn, buffer, msize);
+
+	return (0);
+}
+
+static int
+l9p_socket_get_response_buffer(struct l9p_request *req, struct iovec *iov,
+    size_t *niovp, void *arg __unused)
+{
+	size_t size = req->lr_conn->lc_msize;
+	void *buf;
+
+	buf = l9p_malloc(size);
+	iov[0].iov_base = buf;
+	iov[0].iov_len = size;
+
+	*niovp = 1;
+	return (0);
+}
+
+static int
+l9p_socket_send_response(struct l9p_request *req __unused,
+    const struct iovec *iov, const size_t niov __unused, const size_t iolen,
+    void *arg)
+{
+	struct l9p_socket_softc *sc = (struct l9p_socket_softc *)arg;
+
+	assert(sc->ls_fd >= 0);
+
+	L9P_LOG(L9P_DEBUG, "%p: sending reply, buf=%p, size=%d", arg,
+	    iov[0].iov_base, iolen);
+
+	if (xwrite(sc->ls_fd, iov[0].iov_base, iolen) != (int)iolen) {
+		L9P_LOG(L9P_ERROR, "short write: %s", strerror(errno));
+		return (-1);
+	}
+
+	free(iov[0].iov_base);
+	return (0);
+}
+
+static void
+l9p_socket_drop_response(struct l9p_request *req __unused,
+    const struct iovec *iov, size_t niov __unused, void *arg)
+{
+
+	L9P_LOG(L9P_DEBUG, "%p: drop buf=%p", arg, iov[0].iov_base);
+	free(iov[0].iov_base);
+}
+
+static ssize_t
+xread(int fd, void *buf, size_t count)
+{
+	size_t done = 0;
+	ssize_t ret;
+
+	while (done < count) {
+		ret = read(fd, (char *)buf + done, count - done);
+		if (ret < 0) {
+			if (errno == EINTR)
+				continue;
+
+			return (-1);
+		}
+
+		if (ret == 0)
+			return ((ssize_t)done);
+
+		done += (size_t)ret;
+	}
+
+	return ((ssize_t)done);
+}
+
+static ssize_t
+xwrite(int fd, void *buf, size_t count)
+{
+	size_t done = 0;
+	ssize_t ret;
+
+	while (done < count) {
+		ret = write(fd, (char *)buf + done, count - done);
+		if (ret < 0) {
+			if (errno == EINTR)
+				continue;
+
+			return (-1);
+		}
+
+		if (ret == 0)
+			return ((ssize_t)done);
+
+		done += (size_t)ret;
+	}
+
+	return ((ssize_t)done);
+}
Index: lib/lib9p/utils.c
===================================================================
--- /dev/null
+++ lib/lib9p/utils.c
@@ -0,0 +1,1268 @@
+/*
+ * Copyright 2016 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <inttypes.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#if defined(__FreeBSD__)
+#include <sys/sbuf.h>
+#else
+#include "sbuf/sbuf.h"
+#endif
+#include "lib9p.h"
+#include "fcall.h"
+#include "linux_errno.h"
+
+#ifdef __APPLE__
+  #define GETGROUPS_GROUP_TYPE_IS_INT
+#endif
+
+#define N(ary)          (sizeof(ary) / sizeof(*ary))
+
+/* See l9p_describe_bits() below. */
+struct descbits {
+	uint64_t	db_mask;	/* mask value */
+	uint64_t	db_match;	/* match value */
+	const char	*db_name;	/* name for matched value */
+};
+
+
+static bool l9p_describe_bits(const char *, uint64_t, const char *,
+    const struct descbits *, struct sbuf *);
+static void l9p_describe_fid(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_mode(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_name(const char *, char *, struct sbuf *);
+static void l9p_describe_perm(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_lperm(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_qid(const char *, struct l9p_qid *, struct sbuf *);
+static void l9p_describe_l9stat(const char *, struct l9p_stat *,
+    enum l9p_version, struct sbuf *);
+static void l9p_describe_statfs(const char *, struct l9p_statfs *,
+    struct sbuf *);
+static void l9p_describe_time(struct sbuf *, const char *, uint64_t, uint64_t);
+static void l9p_describe_readdir(struct sbuf *, struct l9p_f_io *);
+static void l9p_describe_size(const char *, uint64_t, struct sbuf *);
+static void l9p_describe_ugid(const char *, uint32_t, struct sbuf *);
+static void l9p_describe_getattr_mask(uint64_t, struct sbuf *);
+static void l9p_describe_unlinkat_flags(const char *, uint32_t, struct sbuf *);
+static const char *lookup_linux_errno(uint32_t);
+
+/*
+ * Using indexed initializers, we can have these occur in any order.
+ * Using adjacent-string concatenation ("T" #name, "R" #name), we
+ * get both Tfoo and Rfoo strings with one copy of the name.
+ * Alas, there is no stupid cpp trick to lowercase-ify, so we
+ * have to write each name twice.  In which case we might as well
+ * make the second one a string in the first place and not bother
+ * with the stringizing.
+ *
+ * This table should have entries for each enum value in fcall.h.
+ */
+#define X(NAME, name)	[L9P_T##NAME - L9P__FIRST] = "T" name, \
+			[L9P_R##NAME - L9P__FIRST] = "R" name
+static const char *ftype_names[] = {
+	X(VERSION,	"version"),
+	X(AUTH,		"auth"),
+	X(ATTACH,	"attach"),
+	X(ERROR,	"error"),
+	X(LERROR,	"lerror"),
+	X(FLUSH,	"flush"),
+	X(WALK,		"walk"),
+	X(OPEN,		"open"),
+	X(CREATE,	"create"),
+	X(READ,		"read"),
+	X(WRITE,	"write"),
+	X(CLUNK,	"clunk"),
+	X(REMOVE,	"remove"),
+	X(STAT,		"stat"),
+	X(WSTAT,	"wstat"),
+	X(STATFS,	"statfs"),
+	X(LOPEN,	"lopen"),
+	X(LCREATE,	"lcreate"),
+	X(SYMLINK,	"symlink"),
+	X(MKNOD,	"mknod"),
+	X(RENAME,	"rename"),
+	X(READLINK,	"readlink"),
+	X(GETATTR,	"getattr"),
+	X(SETATTR,	"setattr"),
+	X(XATTRWALK,	"xattrwalk"),
+	X(XATTRCREATE,	"xattrcreate"),
+	X(READDIR,	"readdir"),
+	X(FSYNC,	"fsync"),
+	X(LOCK,		"lock"),
+	X(GETLOCK,	"getlock"),
+	X(LINK,		"link"),
+	X(MKDIR,	"mkdir"),
+	X(RENAMEAT,	"renameat"),
+	X(UNLINKAT,	"unlinkat"),
+};
+#undef X
+
+void
+l9p_seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2,
+    size_t *niov2, size_t seek)
+{
+	size_t remainder = 0;
+	size_t left = seek;
+	size_t i, j;
+
+	for (i = 0; i < niov1; i++) {
+		size_t toseek = MIN(left, iov1[i].iov_len);
+		left -= toseek;
+
+		if (toseek == iov1[i].iov_len)
+			continue;
+
+		if (left == 0) {
+			remainder = toseek;
+			break;
+		}
+	}
+
+	for (j = i; j < niov1; j++) {
+		iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder;
+		iov2[j - i].iov_len = iov1[j].iov_len - remainder;
+		remainder = 0;
+	}
+
+	*niov2 = j - i;
+}
+
+size_t
+l9p_truncate_iov(struct iovec *iov, size_t niov, size_t length)
+{
+	size_t i, done = 0;
+
+	for (i = 0; i < niov; i++) {
+		size_t toseek = MIN(length - done, iov[i].iov_len);
+		done += toseek;
+
+		if (toseek < iov[i].iov_len) {
+			iov[i].iov_len = toseek;
+			return (i + 1);
+		}
+	}
+
+	return (niov);
+}
+
+/*
+ * This wrapper for getgrouplist() that malloc'ed memory, and
+ * papers over FreeBSD vs Mac differences in the getgrouplist()
+ * argument types.
+ *
+ * Note that this function guarantees that *either*:
+ *     return value != NULL and *angroups has been set
+ * or: return value == NULL and *angroups is 0
+ */
+gid_t *
+l9p_getgrlist(const char *name, gid_t basegid, int *angroups)
+{
+#ifdef GETGROUPS_GROUP_TYPE_IS_INT
+	int i, *int_groups;
+#endif
+	gid_t *groups;
+	int ngroups;
+
+	/*
+	 * Todo, perhaps: while getgrouplist() returns -1, expand.
+	 * For now just use NGROUPS_MAX.
+	 */
+	ngroups = NGROUPS_MAX;
+	groups = malloc((size_t)ngroups * sizeof(*groups));
+#ifdef GETGROUPS_GROUP_TYPE_IS_INT
+	int_groups = groups ? malloc((size_t)ngroups * sizeof(*int_groups)) :
+	    NULL;
+	if (int_groups == NULL) {
+		free(groups);
+		groups = NULL;
+	}
+#endif
+	if (groups == NULL) {
+		*angroups = 0;
+		return (NULL);
+	}
+#ifdef GETGROUPS_GROUP_TYPE_IS_INT
+	(void) getgrouplist(name, (int)basegid, int_groups, &ngroups);
+	for (i = 0; i < ngroups; i++)
+		groups[i] = (gid_t)int_groups[i];
+#else
+	(void) getgrouplist(name, basegid, groups, &ngroups);
+#endif
+	*angroups = ngroups;
+	return (groups);
+}
+
+/*
+ * For the various debug describe ops: decode bits in a bit-field-y
+ * value.  For example, we might produce:
+ *     value=0x3c[FOO,BAR,QUUX,?0x20]
+ * when FOO is bit 0x10, BAR is 0x08, and QUUX is 0x04 (as defined
+ * by the table).  This leaves 0x20 (bit 5) as a mystery, while bits
+ * 4, 3, and 2 were decoded.  (Bits 0 and 1 were 0 on input hence
+ * were not attempted here.)
+ *
+ * For general use we take a uint64_t <value>.  The bit description
+ * table <db> is an array of {mask, match, str} values ending with
+ * {0, 0, NULL}.
+ *
+ * If <str> is non-NULL we'll print it and the mask as well (if
+ * str is NULL we'll print neither).  The mask is always printed in
+ * hex at the moment.  See undec description too.
+ *
+ * For convenience, you can use a mask-and-match value, e.g., to
+ * decode a 2-bit field in bits 0 and 1 you can mask against 3 and
+ * match the values 0, 1, 2, and 3.  To handle this, make sure that
+ * all masks-with-same-match are sequential.
+ *
+ * If there are any nonzero undecoded bits, print them after
+ * all the decode-able bits have been handled.
+ *
+ * The <oc> argument defines the open and close bracket characters,
+ * typically "[]", that surround the entire string.  If NULL, no
+ * brackets are added, else oc[0] goes in the front and oc[1] at
+ * the end, after printing any <str><value> part.
+ *
+ * Returns true if it printed anything (other than the implied
+ * str-and-value, that is).
+ */
+static bool
+l9p_describe_bits(const char *str, uint64_t value, const char *oc,
+    const struct descbits *db, struct sbuf *sb)
+{
+	const char *sep;
+	char bracketbuf[2] = "";
+	bool printed = false;
+
+	if (str != NULL)
+		sbuf_printf(sb, "%s0x%" PRIx64, str, value);
+
+	if (oc != NULL)
+		bracketbuf[0] = oc[0];
+	sep = bracketbuf;
+	for (; db->db_name != NULL; db++) {
+		if ((value & db->db_mask) == db->db_match) {
+			sbuf_printf(sb, "%s%s", sep, db->db_name);
+			sep = ",";
+			printed = true;
+
+			/*
+			 * Clear the field, and make sure we
+			 * won't match a zero-valued field with
+			 * this same mask.
+			 */
+			value &= ~db->db_mask;
+			while (db[1].db_mask == db->db_mask &&
+			    db[1].db_name != NULL)
+				db++;
+		}
+	}
+	if (value != 0) {
+		sbuf_printf(sb, "%s?0x%" PRIx64, sep, value);
+		printed = true;
+	}
+	if (printed && oc != NULL) {
+		bracketbuf[0] = oc[1];
+		sbuf_cat(sb, bracketbuf);
+	}
+	return (printed);
+}
+
+/*
+ * Show file ID.
+ */
+static void
+l9p_describe_fid(const char *str, uint32_t fid, struct sbuf *sb)
+{
+
+	sbuf_printf(sb, "%s%" PRIu32, str, fid);
+}
+
+/*
+ * Show user or group ID.
+ */
+static void
+l9p_describe_ugid(const char *str, uint32_t ugid, struct sbuf *sb)
+{
+
+	sbuf_printf(sb, "%s%" PRIu32, str, ugid);
+}
+
+/*
+ * Show file mode (O_RDWR, O_RDONLY, etc).  The argument is
+ * an l9p_omode, not a Linux flags mode.  Linux flags are
+ * decoded with l9p_describe_lflags.
+ */
+static void
+l9p_describe_mode(const char *str, uint32_t mode, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		{ L9P_OACCMODE,	L9P_OREAD,	"OREAD" },
+		{ L9P_OACCMODE,	L9P_OWRITE,	"OWRITE" },
+		{ L9P_OACCMODE,	L9P_ORDWR,	"ORDWR" },
+		{ L9P_OACCMODE,	L9P_OEXEC,	"OEXEC" },
+
+		{ L9P_OCEXEC,	L9P_OCEXEC,	"OCEXEC" },
+		{ L9P_ODIRECT,	L9P_ODIRECT,	"ODIRECT" },
+		{ L9P_ORCLOSE,	L9P_ORCLOSE,	"ORCLOSE" },
+		{ L9P_OTRUNC,	L9P_OTRUNC,	"OTRUNC" },
+		{ 0, 0, NULL }
+	};
+
+	(void) l9p_describe_bits(str, mode, "[]", bits, sb);
+}
+
+/*
+ * Show Linux mode/flags.
+ */
+static void
+l9p_describe_lflags(const char *str, uint32_t flags, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+	    { L9P_OACCMODE,	L9P_OREAD,		"O_READ" },
+	    { L9P_OACCMODE,	L9P_OWRITE,		"O_WRITE" },
+	    { L9P_OACCMODE,	L9P_ORDWR,		"O_RDWR" },
+	    { L9P_OACCMODE,	L9P_OEXEC,		"O_EXEC" },
+
+	    { L9P_L_O_APPEND,	L9P_L_O_APPEND,		"O_APPEND" },
+	    { L9P_L_O_CLOEXEC,	L9P_L_O_CLOEXEC,	"O_CLOEXEC" },
+	    { L9P_L_O_CREAT,	L9P_L_O_CREAT,		"O_CREAT" },
+	    { L9P_L_O_DIRECT,	L9P_L_O_DIRECT,		"O_DIRECT" },
+	    { L9P_L_O_DIRECTORY, L9P_L_O_DIRECTORY,	"O_DIRECTORY" },
+	    { L9P_L_O_DSYNC,	L9P_L_O_DSYNC,		"O_DSYNC" },
+	    { L9P_L_O_EXCL,	L9P_L_O_EXCL,		"O_EXCL" },
+	    { L9P_L_O_FASYNC,	L9P_L_O_FASYNC,		"O_FASYNC" },
+	    { L9P_L_O_LARGEFILE, L9P_L_O_LARGEFILE,	"O_LARGEFILE" },
+	    { L9P_L_O_NOATIME,	L9P_L_O_NOATIME,	"O_NOATIME" },
+	    { L9P_L_O_NOCTTY,	L9P_L_O_NOCTTY,		"O_NOCTTY" },
+	    { L9P_L_O_NOFOLLOW,	L9P_L_O_NOFOLLOW,	"O_NOFOLLOW" },
+	    { L9P_L_O_NONBLOCK,	L9P_L_O_NONBLOCK,	"O_NONBLOCK" },
+	    { L9P_L_O_PATH,	L9P_L_O_PATH,		"O_PATH" },
+	    { L9P_L_O_SYNC,	L9P_L_O_SYNC,		"O_SYNC" },
+	    { L9P_L_O_TMPFILE,	L9P_L_O_TMPFILE,	"O_TMPFILE" },
+	    { L9P_L_O_TMPFILE,	L9P_L_O_TMPFILE,	"O_TMPFILE" },
+	    { L9P_L_O_TRUNC,	L9P_L_O_TRUNC,		"O_TRUNC" },
+	    { 0, 0, NULL }
+	};
+
+	(void) l9p_describe_bits(str, flags, "[]", bits, sb);
+}
+
+/*
+ * Show file name or other similar, potentially-very-long string.
+ * Actual strings get quotes, a NULL name (if it occurs) gets
+ * <null> (no quotes), so you can tell the difference.
+ */
+static void
+l9p_describe_name(const char *str, char *name, struct sbuf *sb)
+{
+	size_t len;
+
+	if (name == NULL) {
+		sbuf_printf(sb, "%s<null>", str);
+		return;
+	}
+
+	len = strlen(name);
+
+	if (len > 32)
+		sbuf_printf(sb, "%s\"%.*s...\"", str, 32 - 3, name);
+	else
+		sbuf_printf(sb, "%s\"%.*s\"", str, (int)len, name);
+}
+
+/*
+ * Show permissions (rwx etc).  Prints the value in hex only if
+ * the rwx bits do not cover the entire value.
+ */
+static void
+l9p_describe_perm(const char *str, uint32_t mode, struct sbuf *sb)
+{
+	char pbuf[12];
+
+	strmode(mode & 0777, pbuf);
+	if ((mode & ~(uint32_t)0777) != 0)
+		sbuf_printf(sb, "%s0x%" PRIx32 "<%.9s>", str, mode, pbuf + 1);
+	else
+		sbuf_printf(sb, "%s<%.9s>", str, pbuf + 1);
+}
+
+/*
+ * Show "extended" permissions: regular permissions, but also the
+ * various DM* extension bits from 9P2000.u.
+ */
+static void
+l9p_describe_ext_perm(const char *str, uint32_t mode, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		{ L9P_DMDIR,	L9P_DMDIR,	"DMDIR" },
+		{ L9P_DMAPPEND,	L9P_DMAPPEND,	"DMAPPEND" },
+		{ L9P_DMEXCL,	L9P_DMEXCL,	"DMEXCL" },
+		{ L9P_DMMOUNT,	L9P_DMMOUNT,	"DMMOUNT" },
+		{ L9P_DMAUTH,	L9P_DMAUTH,	"DMAUTH" },
+		{ L9P_DMTMP,	L9P_DMTMP,	"DMTMP" },
+		{ L9P_DMSYMLINK, L9P_DMSYMLINK,	"DMSYMLINK" },
+		{ L9P_DMDEVICE,	L9P_DMDEVICE,	"DMDEVICE" },
+		{ L9P_DMNAMEDPIPE, L9P_DMNAMEDPIPE, "DMNAMEDPIPE" },
+		{ L9P_DMSOCKET,	L9P_DMSOCKET,	"DMSOCKET" },
+		{ L9P_DMSETUID,	L9P_DMSETUID,	"DMSETUID" },
+		{ L9P_DMSETGID,	L9P_DMSETGID,	"DMSETGID" },
+		{ 0, 0, NULL }
+	};
+	bool need_sep;
+
+	sbuf_printf(sb, "%s[", str);
+	need_sep = l9p_describe_bits(NULL, mode & ~(uint32_t)0777, NULL,
+	    bits, sb);
+	l9p_describe_perm(need_sep ? "," : "", mode & 0777, sb);
+	sbuf_cat(sb, "]");
+}
+
+/*
+ * Show Linux-specific permissions: regular permissions, but also
+ * the S_IFMT field.
+ */
+static void
+l9p_describe_lperm(const char *str, uint32_t mode, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		{ S_IFMT,	S_IFIFO,	"S_IFIFO" },
+		{ S_IFMT,	S_IFCHR,	"S_IFCHR" },
+		{ S_IFMT,	S_IFDIR,	"S_IFDIR" },
+		{ S_IFMT,	S_IFBLK,	"S_IFBLK" },
+		{ S_IFMT,	S_IFREG,	"S_IFREG" },
+		{ S_IFMT,	S_IFLNK,	"S_IFLNK" },
+		{ S_IFMT,	S_IFSOCK,	"S_IFSOCK" },
+		{ 0, 0, NULL }
+	};
+	bool need_sep;
+
+	sbuf_printf(sb, "%s[", str);
+	need_sep = l9p_describe_bits(NULL, mode & ~(uint32_t)0777, NULL,
+	    bits, sb);
+	l9p_describe_perm(need_sep ? "," : "", mode & 0777, sb);
+	sbuf_cat(sb, "]");
+}
+
+/*
+ * Show qid (<type, version, path> tuple).
+ */
+static void
+l9p_describe_qid(const char *str, struct l9p_qid *qid, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		/*
+		 * NB: L9P_QTFILE is 0, i.e., is implied by no
+		 * other bits being set.  We get this produced
+		 * when we mask against 0xff and compare for
+		 * L9P_QTFILE, but we must do it first so that
+		 * we mask against the original (not-adjusted)
+		 * value.
+		 */
+		{ 0xff,		L9P_QTFILE,	"FILE" },
+		{ L9P_QTDIR,	L9P_QTDIR,	"DIR" },
+		{ L9P_QTAPPEND,	L9P_QTAPPEND,	"APPEND" },
+		{ L9P_QTEXCL,	L9P_QTEXCL,	"EXCL" },
+		{ L9P_QTMOUNT,	L9P_QTMOUNT,	"MOUNT" },
+		{ L9P_QTAUTH,	L9P_QTAUTH,	"AUTH" },
+		{ L9P_QTTMP,	L9P_QTTMP,	"TMP" },
+		{ L9P_QTSYMLINK, L9P_QTSYMLINK,	"SYMLINK" },
+		{ 0, 0, NULL }
+	};
+
+	assert(qid != NULL);
+
+	sbuf_cat(sb, str);
+	(void) l9p_describe_bits("<", qid->type, "[]", bits, sb);
+	sbuf_printf(sb, ",%" PRIu32 ",0x%016" PRIx64 ">",
+	    qid->version, qid->path);
+}
+
+/*
+ * Show size.
+ */
+static void
+l9p_describe_size(const char *str, uint64_t size, struct sbuf *sb)
+{
+
+	sbuf_printf(sb, "%s%" PRIu64, str, size);
+}
+
+/*
+ * Show l9stat (including 9P2000.u extensions if appropriate).
+ */
+static void
+l9p_describe_l9stat(const char *str, struct l9p_stat *st,
+    enum l9p_version version, struct sbuf *sb)
+{
+	bool dotu = version >= L9P_2000U;
+
+	assert(st != NULL);
+
+	sbuf_printf(sb, "%stype=0x%04" PRIx32 " dev=0x%08" PRIx32, str,
+	    st->type, st->dev);
+	l9p_describe_qid(" qid=", &st->qid, sb);
+	l9p_describe_ext_perm(" mode=", st->mode, sb);
+	if (st->atime != (uint32_t)-1)
+		sbuf_printf(sb, " atime=%" PRIu32, st->atime);
+	if (st->mtime != (uint32_t)-1)
+		sbuf_printf(sb, " mtime=%" PRIu32, st->mtime);
+	if (st->length != (uint64_t)-1)
+		sbuf_printf(sb, " length=%" PRIu64, st->length);
+	l9p_describe_name(" name=", st->name, sb);
+	/*
+	 * It's pretty common to have NULL name+gid+muid.  They're
+	 * just noise if NULL *and* dot-u; decode only if non-null
+	 * or not-dot-u.
+	 */
+	if (st->uid != NULL || !dotu)
+		l9p_describe_name(" uid=", st->uid, sb);
+	if (st->gid != NULL || !dotu)
+		l9p_describe_name(" gid=", st->gid, sb);
+	if (st->muid != NULL || !dotu)
+		l9p_describe_name(" muid=", st->muid, sb);
+	if (dotu) {
+		if (st->extension != NULL)
+			l9p_describe_name(" extension=", st->extension, sb);
+		sbuf_printf(sb,
+		    " n_uid=%" PRIu32 " n_gid=%" PRIu32 " n_muid=%" PRIu32,
+		    st->n_uid, st->n_gid, st->n_muid);
+	}
+}
+
+static void
+l9p_describe_statfs(const char *str, struct l9p_statfs *st, struct sbuf *sb)
+{
+
+	assert(st != NULL);
+
+	sbuf_printf(sb, "%stype=0x%04lx bsize=%lu blocks=%" PRIu64
+	    " bfree=%" PRIu64 " bavail=%" PRIu64 " files=%" PRIu64
+	    " ffree=%" PRIu64 " fsid=0x%" PRIx64 " namelen=%" PRIu32 ">",
+	    str, (u_long)st->type, (u_long)st->bsize, st->blocks,
+	    st->bfree, st->bavail, st->files,
+	    st->ffree, st->fsid, st->namelen);
+}
+
+/*
+ * Decode a <seconds,nsec> timestamp.
+ *
+ * Perhaps should use asctime_r.  For now, raw values.
+ */
+static void
+l9p_describe_time(struct sbuf *sb, const char *s, uint64_t sec, uint64_t nsec)
+{
+
+	sbuf_cat(sb, s);
+	if (nsec > 999999999)
+		sbuf_printf(sb, "%" PRIu64 ".<invalid nsec %" PRIu64 ">)",
+		    sec, nsec);
+	else
+		sbuf_printf(sb, "%" PRIu64 ".%09" PRIu64, sec, nsec);
+}
+
+/*
+ * Decode readdir data (.L format, variable length names).
+ */
+static void
+l9p_describe_readdir(struct sbuf *sb, struct l9p_f_io *io)
+{
+	uint32_t count;
+#ifdef notyet
+	int i;
+	struct l9p_message msg;
+	struct l9p_dirent de;
+#endif
+
+	if ((count = io->count) == 0) {
+		sbuf_printf(sb, " EOF (count=0)");
+		return;
+	}
+
+	/*
+	 * Can't do this yet because we do not have the original
+	 * req.
+	 */
+#ifdef notyet
+	sbuf_printf(sb, " count=%" PRIu32 " [", count);
+
+	l9p_init_msg(&msg, req, L9P_UNPACK);
+	for (i = 0; msg.lm_size < count; i++) {
+		if (l9p_pudirent(&msg, &de) < 0) {
+			sbuf_printf(sb, " bad count");
+			break;
+		}
+
+		sbuf_printf(sb, i ? ", " : " ");
+		l9p_describe_qid(" qid=", &de.qid, sb);
+		sbuf_printf(sb, " offset=%" PRIu64 " type=%d",
+		    de.offset, de.type);
+		l9p_describe_name(" name=", de.name);
+		free(de.name);
+	}
+	sbuf_printf(sb, "]=%d dir entries", i);
+#else /* notyet */
+	sbuf_printf(sb, " count=%" PRIu32, count);
+#endif
+}
+
+/*
+ * Decode Tgetattr request_mask field.
+ */
+static void
+l9p_describe_getattr_mask(uint64_t request_mask, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		/*
+		 * Note: ALL and BASIC must occur first and second.
+		 * This is a little dirty: it depends on the way the
+		 * describe_bits code clears the values.  If we
+		 * match ALL, we clear all those bits and do not
+		 * match BASIC; if we match BASIC, we clear all
+		 * those bits and do not match individual bits.  Thus
+		 * if we have BASIC but not all the additional bits,
+		 * we'll see, e.g., [BASIC,BTIME,GEN]; if we have
+		 * all the additional bits too, we'll see [ALL].
+		 *
+		 * Since <undec> is true below, we'll also spot any
+		 * bits added to the protocol since we made this table.
+		 */
+		{ L9PL_GETATTR_ALL,	L9PL_GETATTR_ALL,	"ALL" },
+		{ L9PL_GETATTR_BASIC,	L9PL_GETATTR_BASIC,	"BASIC" },
+
+		/* individual bits in BASIC */
+		{ L9PL_GETATTR_MODE,	L9PL_GETATTR_MODE,	"MODE" },
+		{ L9PL_GETATTR_NLINK,	L9PL_GETATTR_NLINK,	"NLINK" },
+		{ L9PL_GETATTR_UID,	L9PL_GETATTR_UID,	"UID" },
+		{ L9PL_GETATTR_GID,	L9PL_GETATTR_GID,	"GID" },
+		{ L9PL_GETATTR_RDEV,	L9PL_GETATTR_RDEV,	"RDEV" },
+		{ L9PL_GETATTR_ATIME,	L9PL_GETATTR_ATIME,	"ATIME" },
+		{ L9PL_GETATTR_MTIME,	L9PL_GETATTR_MTIME,	"MTIME" },
+		{ L9PL_GETATTR_CTIME,	L9PL_GETATTR_CTIME,	"CTIME" },
+		{ L9PL_GETATTR_INO,	L9PL_GETATTR_INO,	"INO" },
+		{ L9PL_GETATTR_SIZE,	L9PL_GETATTR_SIZE,	"SIZE" },
+		{ L9PL_GETATTR_BLOCKS,	L9PL_GETATTR_BLOCKS,	"BLOCKS" },
+
+		/* additional bits in ALL */
+		{ L9PL_GETATTR_BTIME,	L9PL_GETATTR_BTIME,	"BTIME" },
+		{ L9PL_GETATTR_GEN,	L9PL_GETATTR_GEN,	"GEN" },
+		{ L9PL_GETATTR_DATA_VERSION, L9PL_GETATTR_DATA_VERSION,
+							"DATA_VERSION" },
+		{ 0, 0, NULL }
+	};
+
+	(void) l9p_describe_bits(" request_mask=", request_mask, "[]", bits,
+	    sb);
+}
+
+/*
+ * Decode Tunlinkat flags.
+ */
+static void
+l9p_describe_unlinkat_flags(const char *str, uint32_t flags, struct sbuf *sb)
+{
+	static const struct descbits bits[] = {
+		{ L9PL_AT_REMOVEDIR, L9PL_AT_REMOVEDIR, "AT_REMOVEDIR" },
+		{ 0, 0, NULL }
+	};
+
+	(void) l9p_describe_bits(str, flags, "[]", bits, sb);
+}
+
+static const char *
+lookup_linux_errno(uint32_t linux_errno)
+{
+	static char unknown[50];
+
+	/*
+	 * Error numbers in the "base" range (1..ERANGE) are common
+	 * across BSD, MacOS, Linux, and Plan 9.
+	 *
+	 * Error numbers outside that range require translation.
+	 */
+	const char *const table[] = {
+#define X0(name) [name] = name ## _STR
+#define	X(name) [name] = name ## _STR
+		X(LINUX_EAGAIN),
+		X(LINUX_EDEADLK),
+		X(LINUX_ENAMETOOLONG),
+		X(LINUX_ENOLCK),
+		X(LINUX_ENOSYS),
+		X(LINUX_ENOTEMPTY),
+		X(LINUX_ELOOP),
+		X(LINUX_ENOMSG),
+		X(LINUX_EIDRM),
+		X(LINUX_ECHRNG),
+		X(LINUX_EL2NSYNC),
+		X(LINUX_EL3HLT),
+		X(LINUX_EL3RST),
+		X(LINUX_ELNRNG),
+		X(LINUX_EUNATCH),
+		X(LINUX_ENOCSI),
+		X(LINUX_EL2HLT),
+		X(LINUX_EBADE),
+		X(LINUX_EBADR),
+		X(LINUX_EXFULL),
+		X(LINUX_ENOANO),
+		X(LINUX_EBADRQC),
+		X(LINUX_EBADSLT),
+		X(LINUX_EBFONT),
+		X(LINUX_ENOSTR),
+		X(LINUX_ENODATA),
+		X(LINUX_ETIME),
+		X(LINUX_ENOSR),
+		X(LINUX_ENONET),
+		X(LINUX_ENOPKG),
+		X(LINUX_EREMOTE),
+		X(LINUX_ENOLINK),
+		X(LINUX_EADV),
+		X(LINUX_ESRMNT),
+		X(LINUX_ECOMM),
+		X(LINUX_EPROTO),
+		X(LINUX_EMULTIHOP),
+		X(LINUX_EDOTDOT),
+		X(LINUX_EBADMSG),
+		X(LINUX_EOVERFLOW),
+		X(LINUX_ENOTUNIQ),
+		X(LINUX_EBADFD),
+		X(LINUX_EREMCHG),
+		X(LINUX_ELIBACC),
+		X(LINUX_ELIBBAD),
+		X(LINUX_ELIBSCN),
+		X(LINUX_ELIBMAX),
+		X(LINUX_ELIBEXEC),
+		X(LINUX_EILSEQ),
+		X(LINUX_ERESTART),
+		X(LINUX_ESTRPIPE),
+		X(LINUX_EUSERS),
+		X(LINUX_ENOTSOCK),
+		X(LINUX_EDESTADDRREQ),
+		X(LINUX_EMSGSIZE),
+		X(LINUX_EPROTOTYPE),
+		X(LINUX_ENOPROTOOPT),
+		X(LINUX_EPROTONOSUPPORT),
+		X(LINUX_ESOCKTNOSUPPORT),
+		X(LINUX_EOPNOTSUPP),
+		X(LINUX_EPFNOSUPPORT),
+		X(LINUX_EAFNOSUPPORT),
+		X(LINUX_EADDRINUSE),
+		X(LINUX_EADDRNOTAVAIL),
+		X(LINUX_ENETDOWN),
+		X(LINUX_ENETUNREACH),
+		X(LINUX_ENETRESET),
+		X(LINUX_ECONNABORTED),
+		X(LINUX_ECONNRESET),
+		X(LINUX_ENOBUFS),
+		X(LINUX_EISCONN),
+		X(LINUX_ENOTCONN),
+		X(LINUX_ESHUTDOWN),
+		X(LINUX_ETOOMANYREFS),
+		X(LINUX_ETIMEDOUT),
+		X(LINUX_ECONNREFUSED),
+		X(LINUX_EHOSTDOWN),
+		X(LINUX_EHOSTUNREACH),
+		X(LINUX_EALREADY),
+		X(LINUX_EINPROGRESS),
+		X(LINUX_ESTALE),
+		X(LINUX_EUCLEAN),
+		X(LINUX_ENOTNAM),
+		X(LINUX_ENAVAIL),
+		X(LINUX_EISNAM),
+		X(LINUX_EREMOTEIO),
+		X(LINUX_EDQUOT),
+		X(LINUX_ENOMEDIUM),
+		X(LINUX_EMEDIUMTYPE),
+		X(LINUX_ECANCELED),
+		X(LINUX_ENOKEY),
+		X(LINUX_EKEYEXPIRED),
+		X(LINUX_EKEYREVOKED),
+		X(LINUX_EKEYREJECTED),
+		X(LINUX_EOWNERDEAD),
+		X(LINUX_ENOTRECOVERABLE),
+		X(LINUX_ERFKILL),
+		X(LINUX_EHWPOISON),
+#undef X0
+#undef X
+	};
+	if ((size_t)linux_errno < N(table) && table[linux_errno] != NULL)
+		return (table[linux_errno]);
+	if (linux_errno <= ERANGE)
+		return (strerror((int)linux_errno));
+	(void) snprintf(unknown, sizeof(unknown),
+	    "Unknown error %d", linux_errno);
+	return (unknown);
+}
+
+void
+l9p_describe_fcall(union l9p_fcall *fcall, enum l9p_version version,
+    struct sbuf *sb)
+{
+	uint64_t mask;
+	uint8_t type;
+	int i;
+
+	assert(fcall != NULL);
+	assert(sb != NULL);
+	assert(version <= L9P_2000L && version >= L9P_INVALID_VERSION);
+
+	type = fcall->hdr.type;
+
+	if (type < L9P__FIRST || type >= L9P__LAST_PLUS_1 ||
+	    ftype_names[type - L9P__FIRST] == NULL) {
+		const char *rr;
+
+		/*
+		 * Can't say for sure that this distinction --
+		 * an even number is a request, an odd one is
+		 * a response -- will be maintained forever,
+		 * but it's good enough for now.
+		 */
+		rr = (type & 1) != 0 ? "response" : "request";
+		sbuf_printf(sb, "<unknown %s %d> tag=%d", rr, type,
+		    fcall->hdr.tag);
+	} else {
+		sbuf_printf(sb, "%s tag=%d", ftype_names[type - L9P__FIRST],
+		    fcall->hdr.tag);
+	}
+
+	switch (type) {
+	case L9P_TVERSION:
+	case L9P_RVERSION:
+		sbuf_printf(sb, " version=\"%s\" msize=%d", fcall->version.version,
+		    fcall->version.msize);
+		return;
+
+	case L9P_TAUTH:
+		l9p_describe_fid(" afid=", fcall->hdr.fid, sb);
+		sbuf_printf(sb, " uname=\"%s\" aname=\"%s\"",
+		    fcall->tauth.uname, fcall->tauth.aname);
+		return;
+
+	case L9P_TATTACH:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_fid(" afid=", fcall->tattach.afid, sb);
+		sbuf_printf(sb, " uname=\"%s\" aname=\"%s\"",
+		    fcall->tattach.uname, fcall->tattach.aname);
+		if (version >= L9P_2000U)
+			sbuf_printf(sb, " n_uname=%d", fcall->tattach.n_uname);
+		return;
+
+	case L9P_RATTACH:
+		l9p_describe_qid(" ", &fcall->rattach.qid, sb);
+		return;
+
+	case L9P_RERROR:
+		sbuf_printf(sb, " ename=\"%s\" errnum=%d", fcall->error.ename,
+		    fcall->error.errnum);
+		return;
+
+	case L9P_RLERROR:
+		sbuf_printf(sb, " errnum=%d (%s)", fcall->error.errnum,
+		    lookup_linux_errno(fcall->error.errnum));
+		return;
+
+	case L9P_TFLUSH:
+		sbuf_printf(sb, " oldtag=%d", fcall->tflush.oldtag);
+		return;
+
+	case L9P_RFLUSH:
+		return;
+
+	case L9P_TWALK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_fid(" newfid=", fcall->twalk.newfid, sb);
+		if (fcall->twalk.nwname) {
+			sbuf_cat(sb, " wname=\"");
+			for (i = 0; i < fcall->twalk.nwname; i++)
+				sbuf_printf(sb, "%s%s", i == 0 ? "" : "/",
+				    fcall->twalk.wname[i]);
+			sbuf_cat(sb, "\"");
+		}
+		return;
+
+	case L9P_RWALK:
+		sbuf_printf(sb, " wqid=[");
+		for (i = 0; i < fcall->rwalk.nwqid; i++)
+			l9p_describe_qid(i == 0 ? "" : ",",
+			    &fcall->rwalk.wqid[i], sb);
+		sbuf_cat(sb, "]");
+		return;
+
+	case L9P_TOPEN:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_mode(" mode=", fcall->tcreate.mode, sb);
+		return;
+
+	case L9P_ROPEN:
+		l9p_describe_qid(" qid=", &fcall->ropen.qid, sb);
+		sbuf_printf(sb, " iounit=%d", fcall->ropen.iounit);
+		return;
+
+	case L9P_TCREATE:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tcreate.name, sb);
+		l9p_describe_ext_perm(" perm=", fcall->tcreate.perm, sb);
+		l9p_describe_mode(" mode=", fcall->tcreate.mode, sb);
+		if (version >= L9P_2000U && fcall->tcreate.extension != NULL)
+			l9p_describe_name(" extension=",
+			    fcall->tcreate.extension, sb);
+		return;
+
+	case L9P_RCREATE:
+		l9p_describe_qid(" qid=", &fcall->rcreate.qid, sb);
+		sbuf_printf(sb, " iounit=%d", fcall->rcreate.iounit);
+		return;
+
+	case L9P_TREAD:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		sbuf_printf(sb, " offset=%" PRIu64 " count=%" PRIu32,
+		    fcall->io.offset, fcall->io.count);
+		return;
+
+	case L9P_RREAD:
+	case L9P_RWRITE:
+		sbuf_printf(sb, " count=%" PRIu32, fcall->io.count);
+		return;
+
+	case L9P_TWRITE:
+	case L9P_TREADDIR:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		sbuf_printf(sb, " offset=%" PRIu64 " count=%" PRIu32,
+		    fcall->io.offset, fcall->io.count);
+		return;
+
+	case L9P_TCLUNK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RCLUNK:
+		return;
+
+	case L9P_TREMOVE:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RREMOVE:
+		return;
+
+	case L9P_TSTAT:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RSTAT:
+		l9p_describe_l9stat(" ", &fcall->rstat.stat, version, sb);
+		return;
+
+	case L9P_TWSTAT:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_l9stat(" ", &fcall->twstat.stat, version, sb);
+		return;
+
+	case L9P_RWSTAT:
+		return;
+
+	case L9P_TSTATFS:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RSTATFS:
+		l9p_describe_statfs(" ", &fcall->rstatfs.statfs, sb);
+		return;
+
+	case L9P_TLOPEN:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_lflags(" flags=", fcall->tlcreate.flags, sb);
+		return;
+
+	case L9P_RLOPEN:
+		l9p_describe_qid(" qid=", &fcall->rlopen.qid, sb);
+		sbuf_printf(sb, " iounit=%d", fcall->rlopen.iounit);
+		return;
+
+	case L9P_TLCREATE:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tlcreate.name, sb);
+		/* confusing: "flags" is open-mode, "mode" is permissions */
+		l9p_describe_lflags(" flags=", fcall->tlcreate.flags, sb);
+		/* TLCREATE mode/permissions have S_IFREG (0x8000) set */
+		l9p_describe_lperm(" mode=", fcall->tlcreate.mode, sb);
+		l9p_describe_ugid(" gid=", fcall->tlcreate.gid, sb);
+		return;
+
+	case L9P_RLCREATE:
+		l9p_describe_qid(" qid=", &fcall->rlcreate.qid, sb);
+		sbuf_printf(sb, " iounit=%d", fcall->rlcreate.iounit);
+		return;
+
+	case L9P_TSYMLINK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tsymlink.name, sb);
+		l9p_describe_name(" symtgt=", fcall->tsymlink.symtgt, sb);
+		l9p_describe_ugid(" gid=", fcall->tsymlink.gid, sb);
+		return;
+
+	case L9P_RSYMLINK:
+		l9p_describe_qid(" qid=", &fcall->ropen.qid, sb);
+		return;
+
+	case L9P_TMKNOD:
+		l9p_describe_fid(" dfid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tmknod.name, sb);
+		/*
+		 * TMKNOD mode/permissions have S_IFBLK/S_IFCHR/S_IFIFO
+		 * bits.  The major and minor values are only meaningful
+		 * for S_IFBLK and S_IFCHR, but just decode always here.
+		 */
+		l9p_describe_lperm(" mode=", fcall->tmknod.mode, sb);
+		sbuf_printf(sb, " major=%u minor=%u",
+		    fcall->tmknod.major, fcall->tmknod.minor);
+		l9p_describe_ugid(" gid=", fcall->tmknod.gid, sb);
+		return;
+
+	case L9P_RMKNOD:
+		l9p_describe_qid(" qid=", &fcall->rmknod.qid, sb);
+		return;
+
+	case L9P_TRENAME:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_fid(" dfid=", fcall->trename.dfid, sb);
+		l9p_describe_name(" name=", fcall->trename.name, sb);
+		return;
+
+	case L9P_RRENAME:
+		return;
+
+	case L9P_TREADLINK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RREADLINK:
+		l9p_describe_name(" target=", fcall->rreadlink.target, sb);
+		return;
+
+	case L9P_TGETATTR:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_getattr_mask(fcall->tgetattr.request_mask, sb);
+		return;
+
+	case L9P_RGETATTR:
+		/* Don't need to decode bits: they're implied by the output */
+		mask = fcall->rgetattr.valid;
+		sbuf_printf(sb, " valid=0x%016" PRIx64, mask);
+		l9p_describe_qid(" qid=", &fcall->rgetattr.qid, sb);
+		if (mask & L9PL_GETATTR_MODE)
+			l9p_describe_lperm(" mode=", fcall->rgetattr.mode, sb);
+		if (mask & L9PL_GETATTR_UID)
+			l9p_describe_ugid(" uid=", fcall->rgetattr.uid, sb);
+		if (mask & L9PL_GETATTR_GID)
+			l9p_describe_ugid(" gid=", fcall->rgetattr.gid, sb);
+		if (mask & L9PL_GETATTR_NLINK)
+			sbuf_printf(sb, " nlink=%" PRIu64,
+			    fcall->rgetattr.nlink);
+		if (mask & L9PL_GETATTR_RDEV)
+			sbuf_printf(sb, " rdev=0x%" PRIx64,
+			    fcall->rgetattr.rdev);
+		if (mask & L9PL_GETATTR_SIZE)
+			l9p_describe_size(" size=", fcall->rgetattr.size, sb);
+		if (mask & L9PL_GETATTR_BLOCKS)
+			sbuf_printf(sb, " blksize=%" PRIu64 " blocks=%" PRIu64,
+			    fcall->rgetattr.blksize, fcall->rgetattr.blocks);
+		if (mask & L9PL_GETATTR_ATIME)
+			l9p_describe_time(sb, " atime=",
+			    fcall->rgetattr.atime_sec,
+			    fcall->rgetattr.atime_nsec);
+		if (mask & L9PL_GETATTR_MTIME)
+			l9p_describe_time(sb, " mtime=",
+			    fcall->rgetattr.mtime_sec,
+			    fcall->rgetattr.mtime_nsec);
+		if (mask & L9PL_GETATTR_CTIME)
+			l9p_describe_time(sb, " ctime=",
+			    fcall->rgetattr.ctime_sec,
+			    fcall->rgetattr.ctime_nsec);
+		if (mask & L9PL_GETATTR_BTIME)
+			l9p_describe_time(sb, " btime=",
+			    fcall->rgetattr.btime_sec,
+			    fcall->rgetattr.btime_nsec);
+		if (mask & L9PL_GETATTR_GEN)
+			sbuf_printf(sb, " gen=0x%" PRIx64, fcall->rgetattr.gen);
+		if (mask & L9PL_GETATTR_DATA_VERSION)
+			sbuf_printf(sb, " data_version=0x%" PRIx64,
+			    fcall->rgetattr.data_version);
+		return;
+
+	case L9P_TSETATTR:
+		/* As with RGETATTR, we'll imply decode via output. */
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		mask = fcall->tsetattr.valid;
+		/* NB: tsetattr valid mask is only 32 bits, hence %08x */
+		sbuf_printf(sb, " valid=0x%08" PRIx64, mask);
+		if (mask & L9PL_SETATTR_MODE)
+			l9p_describe_lperm(" mode=", fcall->tsetattr.mode, sb);
+		if (mask & L9PL_SETATTR_UID)
+			l9p_describe_ugid(" uid=", fcall->tsetattr.uid, sb);
+		if (mask & L9PL_SETATTR_GID)
+			l9p_describe_ugid(" uid=", fcall->tsetattr.gid, sb);
+		if (mask & L9PL_SETATTR_SIZE)
+			l9p_describe_size(" size=", fcall->tsetattr.size, sb);
+		if (mask & L9PL_SETATTR_ATIME) {
+			if (mask & L9PL_SETATTR_ATIME_SET)
+				l9p_describe_time(sb, " atime=",
+				    fcall->tsetattr.atime_sec,
+				    fcall->tsetattr.atime_nsec);
+			else
+				sbuf_cat(sb, " atime=now");
+		}
+		if (mask & L9PL_SETATTR_MTIME) {
+			if (mask & L9PL_SETATTR_MTIME_SET)
+				l9p_describe_time(sb, " mtime=",
+				    fcall->tsetattr.mtime_sec,
+				    fcall->tsetattr.mtime_nsec);
+			else
+				sbuf_cat(sb, " mtime=now");
+		}
+		if (mask & L9PL_SETATTR_CTIME)
+			sbuf_cat(sb, " ctime=now");
+		return;
+
+	case L9P_RSETATTR:
+		return;
+
+	case L9P_TXATTRWALK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_fid(" newfid=", fcall->txattrwalk.newfid, sb);
+		l9p_describe_name(" name=", fcall->txattrwalk.name, sb);
+		return;
+
+	case L9P_RXATTRWALK:
+		l9p_describe_size(" size=", fcall->rxattrwalk.size, sb);
+		return;
+
+	case L9P_TXATTRCREATE:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->txattrcreate.name, sb);
+		l9p_describe_size(" size=", fcall->txattrcreate.attr_size, sb);
+		sbuf_printf(sb, " flags=%" PRIu32, fcall->txattrcreate.flags);
+		return;
+
+	case L9P_RXATTRCREATE:
+		return;
+
+	case L9P_RREADDIR:
+		l9p_describe_readdir(sb, &fcall->io);
+		return;
+
+	case L9P_TFSYNC:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		return;
+
+	case L9P_RFSYNC:
+		return;
+
+	case L9P_TLOCK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		/* decode better later */
+		sbuf_printf(sb, " type=%d flags=0x%" PRIx32
+		    " start=%" PRIu64 " length=%" PRIu64
+		    " proc_id=0x%" PRIx32 " client_id=\"%s\"",
+		    fcall->tlock.type, fcall->tlock.flags,
+		    fcall->tlock.start, fcall->tlock.length,
+		    fcall->tlock.proc_id, fcall->tlock.client_id);
+		return;
+
+	case L9P_RLOCK:
+		sbuf_printf(sb, " status=%d", fcall->rlock.status);
+		return;
+
+	case L9P_TGETLOCK:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		/* FALLTHROUGH */
+
+	case L9P_RGETLOCK:
+		/* decode better later */
+		sbuf_printf(sb, " type=%d "
+		    " start=%" PRIu64 " length=%" PRIu64
+		    " proc_id=0x%" PRIx32 " client_id=\"%s\"",
+		    fcall->getlock.type,
+		    fcall->getlock.start, fcall->getlock.length,
+		    fcall->getlock.proc_id, fcall->getlock.client_id);
+		return;
+
+	case L9P_TLINK:
+		l9p_describe_fid(" dfid=", fcall->tlink.dfid, sb);
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tlink.name, sb);
+		return;
+
+	case L9P_RLINK:
+		return;
+
+	case L9P_TMKDIR:
+		l9p_describe_fid(" fid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tmkdir.name, sb);
+		/* TMKDIR mode/permissions have S_IFDIR set */
+		l9p_describe_lperm(" mode=", fcall->tmkdir.mode, sb);
+		l9p_describe_ugid(" gid=", fcall->tmkdir.gid, sb);
+		return;
+
+	case L9P_RMKDIR:
+		l9p_describe_qid(" qid=", &fcall->rmkdir.qid, sb);
+		return;
+
+	case L9P_TRENAMEAT:
+		l9p_describe_fid(" olddirfid=", fcall->hdr.fid, sb);
+		l9p_describe_name(" oldname=", fcall->trenameat.oldname,
+		    sb);
+		l9p_describe_fid(" newdirfid=", fcall->trenameat.newdirfid, sb);
+		l9p_describe_name(" newname=", fcall->trenameat.newname,
+		    sb);
+		return;
+
+	case L9P_RRENAMEAT:
+		return;
+
+	case L9P_TUNLINKAT:
+		l9p_describe_fid(" dirfd=", fcall->hdr.fid, sb);
+		l9p_describe_name(" name=", fcall->tunlinkat.name, sb);
+		l9p_describe_unlinkat_flags(" flags=",
+		    fcall->tunlinkat.flags, sb);
+		return;
+
+	case L9P_RUNLINKAT:
+		return;
+
+	default:
+		sbuf_printf(sb, " <missing case in %s()>", __func__);
+	}
+}
Index: share/mk/bsd.libnames.mk
===================================================================
--- share/mk/bsd.libnames.mk
+++ share/mk/bsd.libnames.mk
@@ -17,6 +17,7 @@
 LIBCRT0?=	${LIBDESTDIR}${LIBDIR_BASE}/crt0.o
 
 LIB80211?=	${LIBDESTDIR}${LIBDIR_BASE}/lib80211.a
+LIB9P?=		${LIBDESTDIR}${LIBDIR_BASE}/lib9p.a
 LIBALIAS?=	${LIBDESTDIR}${LIBDIR_BASE}/libalias.a
 LIBARCHIVE?=	${LIBDESTDIR}${LIBDIR_BASE}/libarchive.a
 LIBASN1?=	${LIBDESTDIR}${LIBDIR_BASE}/libasn1.a
Index: share/mk/src.libnames.mk
===================================================================
--- share/mk/src.libnames.mk
+++ share/mk/src.libnames.mk
@@ -69,6 +69,7 @@
 		${_INTERNALLIBS} \
 		${LOCAL_LIBRARIES} \
 		80211 \
+		9p \
 		alias \
 		archive \
 		asn1 \
@@ -239,6 +240,7 @@
 # Each library's LIBADD needs to be duplicated here for static linkage of
 # 2nd+ order consumers.  Auto-generating this would be better.
 _DP_80211=	sbuf bsdxml
+_DP_9p=		sbuf
 _DP_archive=	z bz2 lzma bsdxml zstd
 _DP_zstd=	pthread
 .if ${MK_BLACKLIST} != "no"
Index: usr.sbin/bhyve/Makefile
===================================================================
--- usr.sbin/bhyve/Makefile
+++ usr.sbin/bhyve/Makefile
@@ -3,6 +3,7 @@
 #
 
 .include <src.opts.mk>
+CFLAGS+=-I${SRCTOP}/lib/lib9p
 CFLAGS+=-I${SRCTOP}/sys
 .PATH:  ${SRCTOP}/sys/cam/ctl
 
@@ -46,6 +47,7 @@
 	pci_lpc.c		\
 	pci_nvme.c		\
 	pci_passthru.c		\
+	pci_virtio_9p.c		\
 	pci_virtio_block.c	\
 	pci_virtio_console.c	\
 	pci_virtio_net.c	\
@@ -74,7 +76,7 @@
 .PATH:  ${BHYVE_SYSDIR}/sys/amd64/vmm
 SRCS+=	vmm_instruction_emul.c
 
-LIBADD=	vmmapi md pthread z util sbuf cam
+LIBADD=	vmmapi md pthread z util sbuf cam 9p casper cap_pwd cap_grp
 
 .if ${MK_INET_SUPPORT} != "no"
 CFLAGS+=-DINET
Index: usr.sbin/bhyve/bhyve.8
===================================================================
--- usr.sbin/bhyve/bhyve.8
+++ usr.sbin/bhyve/bhyve.8
@@ -223,6 +223,8 @@
 Virtio block storage interface.
 .It Li virtio-scsi
 Virtio SCSI interface.
+.It Li virtio-9p
+Virtio 9p (VirtFS) interface.
 .It Li virtio-rnd
 Virtio RNG interface.
 .It Li virtio-console
@@ -312,6 +314,19 @@
 The default value is 0.
 .El
 .Pp
+9P devices:
+.Bl -tag -width 10n
+.It Pa sharename=/path/to/share[,9p-device-options]
+.El
+.Pp
+The
+.Ar 9p-device-options
+are:
+.Bl -tag -width 10n
+.It Li ro
+Expose the share in read-only mode.
+.El
+.Pp
 TTY devices:
 .Bl -tag -width 10n
 .It Li stdio
Index: usr.sbin/bhyve/pci_virtio_9p.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/pci_virtio_9p.c
@@ -0,0 +1,344 @@
+/*-
+ * Copyright (c) 2015 iXsystems Inc.
+ * Copyright (c) 2017-2018 Jakub Klama <jceel@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * VirtIO filesystem passthrough using 9p protocol.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/uio.h>
+#include <sys/capsicum.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+
+#include <lib9p.h>
+#include <backend/fs.h>
+
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "virtio.h"
+
+#define	VT9P_MAX_IOV	128
+#define VT9P_RINGSZ	256
+#define	VT9P_MAXTAGSZ	256
+#define	VT9P_CONFIGSPACESZ	(VT9P_MAXTAGSZ + sizeof(uint16_t))
+
+static int pci_vt9p_debug;
+#define DPRINTF(params) if (pci_vt9p_debug) printf params
+#define WPRINTF(params) printf params
+
+/*
+ * Per-device softc
+ */
+struct pci_vt9p_softc {
+	struct virtio_softc      vsc_vs;
+	struct vqueue_info       vsc_vq;
+	pthread_mutex_t          vsc_mtx;
+	uint64_t                 vsc_cfg;
+	uint64_t                 vsc_features;
+	char *                   vsc_rootpath;
+	struct pci_vt9p_config * vsc_config;
+	struct l9p_backend *     vsc_fs_backend;
+	struct l9p_server *      vsc_server;
+        struct l9p_connection *  vsc_conn;
+};
+
+struct pci_vt9p_request {
+	struct pci_vt9p_softc *	vsr_sc;
+	struct iovec *		vsr_iov;
+	size_t			vsr_niov;
+	size_t			vsr_respidx;
+	size_t			vsr_iolen;
+	uint16_t		vsr_idx;
+};
+
+struct pci_vt9p_config {
+	uint16_t tag_len;
+	char tag[0];
+} __attribute__((packed));
+
+static int pci_vt9p_send(struct l9p_request *, const struct iovec *,
+    const size_t, const size_t, void *);
+static void pci_vt9p_drop(struct l9p_request *, const struct iovec *, size_t,
+    void *);
+static void pci_vt9p_reset(void *);
+static void pci_vt9p_notify(void *, struct vqueue_info *);
+static int pci_vt9p_cfgread(void *, int, int, uint32_t *);
+static void pci_vt9p_neg_features(void *, uint64_t);
+
+static struct virtio_consts vt9p_vi_consts = {
+	"vt9p",			/* our name */
+	1,			/* we support 1 virtqueue */
+	VT9P_CONFIGSPACESZ,	/* config reg size */
+	pci_vt9p_reset,		/* reset */
+	pci_vt9p_notify,	/* device-wide qnotify */
+	pci_vt9p_cfgread,	/* read virtio config */
+	NULL,			/* write virtio config */
+	pci_vt9p_neg_features,	/* apply negotiated features */
+	(1 << 0),		/* our capabilities */
+};
+
+
+static void
+pci_vt9p_reset(void *vsc)
+{
+	struct pci_vt9p_softc *sc;
+
+	sc = vsc;
+
+	DPRINTF(("vt9p: device reset requested !\n"));
+	vi_reset_dev(&sc->vsc_vs);
+}
+
+static void
+pci_vt9p_neg_features(void *vsc, uint64_t negotiated_features)
+{
+	struct pci_vt9p_softc *sc = vsc;
+
+	sc->vsc_features = negotiated_features;
+}
+
+static int
+pci_vt9p_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+	struct pci_vt9p_softc *sc = vsc;
+	void *ptr;
+
+	ptr = (uint8_t *)sc->vsc_config + offset;
+	memcpy(retval, ptr, size);
+	return (0);
+}
+
+static int
+pci_vt9p_get_buffer(struct l9p_request *req, struct iovec *iov, size_t *niov,
+    void *arg)
+{
+	struct pci_vt9p_request *preq = req->lr_aux;
+	size_t n = preq->vsr_niov - preq->vsr_respidx;
+	
+	memcpy(iov, preq->vsr_iov + preq->vsr_respidx,
+	    n * sizeof(struct iovec));
+	*niov = n;
+	return (0);
+}
+
+static int
+pci_vt9p_send(struct l9p_request *req, const struct iovec *iov,
+    const size_t niov, const size_t iolen, void *arg)
+{
+	struct pci_vt9p_request *preq = req->lr_aux;
+	struct pci_vt9p_softc *sc = preq->vsr_sc;
+
+	preq->vsr_iolen = iolen;
+
+	pthread_mutex_lock(&sc->vsc_mtx);
+	vq_relchain(&sc->vsc_vq, preq->vsr_idx, preq->vsr_iolen);
+	vq_endchains(&sc->vsc_vq, 1);
+	pthread_mutex_unlock(&sc->vsc_mtx);
+	free(preq);
+	return (0);
+}
+
+static void
+pci_vt9p_drop(struct l9p_request *req, const struct iovec *iov, size_t niov,
+    void *arg)
+{
+	struct pci_vt9p_request *preq = req->lr_aux;
+	struct pci_vt9p_softc *sc = preq->vsr_sc;
+
+	pthread_mutex_lock(&sc->vsc_mtx);
+	vq_relchain(&sc->vsc_vq, preq->vsr_idx, 0);
+	vq_endchains(&sc->vsc_vq, 1);
+	pthread_mutex_unlock(&sc->vsc_mtx);
+	free(preq);
+}
+
+static void
+pci_vt9p_notify(void *vsc, struct vqueue_info *vq)
+{
+	struct iovec iov[VT9P_MAX_IOV];
+	struct pci_vt9p_softc *sc;
+	struct pci_vt9p_request *preq;
+	uint16_t idx, n, i;
+	uint16_t flags[VT9P_MAX_IOV];
+
+	sc = vsc;
+
+	while (vq_has_descs(vq)) {
+		n = vq_getchain(vq, &idx, iov, VT9P_MAX_IOV, flags);
+		preq = calloc(1, sizeof(struct pci_vt9p_request));
+		preq->vsr_sc = sc;
+		preq->vsr_idx = idx;
+		preq->vsr_iov = iov;
+		preq->vsr_niov = n;
+		preq->vsr_respidx = 0;
+
+		/* Count readable descriptors */
+		for (i = 0; i < n; i++) {
+			if (flags[i] & VRING_DESC_F_WRITE)
+				break;
+
+			preq->vsr_respidx++;
+		}
+
+		for (int i = 0; i < n; i++) {
+			DPRINTF(("vt9p: vt9p_notify(): desc%d base=%p, "
+			    "len=%zu, flags=0x%04x\r\n", i, iov[i].iov_base,
+			    iov[i].iov_len, flags[i]));
+		}
+
+		l9p_connection_recv(sc->vsc_conn, iov, preq->vsr_respidx, preq);
+	}
+}
+
+
+static int
+pci_vt9p_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
+{
+	struct pci_vt9p_softc *sc;
+	char *opt;
+	char *sharename = NULL;
+	char *rootpath = NULL;
+	int rootfd;
+	bool ro = false;
+	cap_rights_t rootcap;
+
+	if (opts == NULL) {
+		printf("virtio-9p: share name and path required\n");
+		return (1);
+	}
+
+	while ((opt = strsep(&opts, ",")) != NULL) {
+		if (strchr(opt, '=') != NULL) {
+			if (sharename != NULL) {
+				printf("virtio-9p: more than one share name given\n");
+				return (1);
+			}
+
+			sharename = strsep(&opt, "=");
+			rootpath = opt;
+			continue;
+		}
+
+		if (strcmp(opt, "ro") == 0) {
+			DPRINTF(("read-only mount requested\r\n"));
+			ro = true;
+			continue;
+		}
+
+		printf("virtio-9p: invalid option '%s'\n", opt);
+		return (1);
+	}
+
+	if (strlen(sharename) > VT9P_MAXTAGSZ) {
+		printf("virtio-9p: share name too long\n");
+		return (1);
+	}
+
+	rootfd = open(rootpath, O_DIRECTORY);
+	if (rootfd < 0)
+		return (-1);
+
+	sc = calloc(1, sizeof(struct pci_vt9p_softc));
+	sc->vsc_config = calloc(1, sizeof(struct pci_vt9p_config) +
+	    VT9P_MAXTAGSZ);
+
+	pthread_mutex_init(&sc->vsc_mtx, NULL);
+
+	cap_rights_init(&rootcap,
+	    CAP_LOOKUP, CAP_ACL_CHECK, CAP_ACL_DELETE, CAP_ACL_GET,
+	    CAP_ACL_SET, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSTAT,
+	    CAP_CREATE, CAP_FCHMODAT, CAP_FCHOWNAT, CAP_FTRUNCATE,
+	    CAP_LINKAT_SOURCE, CAP_LINKAT_TARGET, CAP_MKDIRAT, CAP_MKNODAT,
+	    CAP_PREAD, CAP_PWRITE, CAP_RENAMEAT_SOURCE, CAP_RENAMEAT_TARGET,
+	    CAP_SEEK, CAP_SYMLINKAT, CAP_UNLINKAT, CAP_EXTATTR_DELETE,
+	    CAP_EXTATTR_GET, CAP_EXTATTR_LIST, CAP_EXTATTR_SET,
+	    CAP_FUTIMES, CAP_FSTATFS, CAP_FSYNC, CAP_FPATHCONF);
+
+	if (cap_rights_limit(rootfd, &rootcap) != 0)
+		return (1);
+
+	sc->vsc_config->tag_len = (uint16_t)strlen(sharename);
+	memcpy(sc->vsc_config->tag, sharename, sc->vsc_config->tag_len);
+	
+	if (l9p_backend_fs_init(&sc->vsc_fs_backend, rootfd, ro) != 0) {
+		errno = ENXIO;
+		return (1);
+	}
+
+	if (l9p_server_init(&sc->vsc_server, sc->vsc_fs_backend) != 0) {
+		errno = ENXIO;
+		return (1);
+	}
+
+	if (l9p_connection_init(sc->vsc_server, &sc->vsc_conn) != 0) {
+		errno = EIO;
+		return (1);
+	}
+
+	sc->vsc_conn->lc_msize = L9P_MAX_IOV * PAGE_SIZE;
+	sc->vsc_conn->lc_lt.lt_get_response_buffer = pci_vt9p_get_buffer;
+	sc->vsc_conn->lc_lt.lt_send_response = pci_vt9p_send;
+	sc->vsc_conn->lc_lt.lt_drop_response = pci_vt9p_drop;
+
+	vi_softc_linkup(&sc->vsc_vs, &vt9p_vi_consts, sc, pi, &sc->vsc_vq);
+	sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
+	sc->vsc_vq.vq_qsize = VT9P_RINGSZ;
+
+	/* initialize config space */
+	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_9P);
+	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
+	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
+	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_9P);
+	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
+
+	if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
+		return (1);
+	vi_set_io_bar(&sc->vsc_vs, 0);
+
+	return (0);
+}
+
+struct pci_devemu pci_de_v9p = {
+	.pe_emu =	"virtio-9p",
+	.pe_init =	pci_vt9p_init,
+	.pe_barwrite =	vi_pci_write,
+	.pe_barread =	vi_pci_read
+};
+PCI_EMUL_SET(pci_de_v9p);
Index: usr.sbin/bhyve/virtio.h
===================================================================
--- usr.sbin/bhyve/virtio.h
+++ usr.sbin/bhyve/virtio.h
@@ -216,6 +216,7 @@
 #define	VIRTIO_DEV_CONSOLE	0x1003
 #define	VIRTIO_DEV_RANDOM	0x1005
 #define	VIRTIO_DEV_SCSI		0x1008
+#define	VIRTIO_DEV_9P		0x1009
 
 /*
  * PCI config space constants.