Index: etc/mtree/BSD.include.dist =================================================================== --- etc/mtree/BSD.include.dist +++ etc/mtree/BSD.include.dist @@ -193,6 +193,8 @@ .. lib80211 .. + lib9p + .. libipt .. libmilter Index: lib/Makefile =================================================================== --- lib/Makefile +++ lib/Makefile @@ -27,6 +27,7 @@ SUBDIR= ${SUBDIR_BOOTSTRAP} \ .WAIT \ geom \ + lib9p \ libalias \ libarchive \ libauditd \ Index: lib/lib9p/COPYRIGHT =================================================================== --- /dev/null +++ lib/lib9p/COPYRIGHT @@ -0,0 +1,47 @@ +Copyright 2016 Jakub Klama +All rights reserved + +Redistribution and use in source and binary forms, with or without +modification, are permitted providing that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +Some parts of the code are based on libixp (http://libs.suckless.org/libixp) +library code released under following license: + +© 2005-2006 Anselm R. Garbe +© 2006-2010 Kris Maglione + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. Index: lib/lib9p/GNUmakefile =================================================================== --- /dev/null +++ lib/lib9p/GNUmakefile @@ -0,0 +1,76 @@ +CC_VERSION := $(shell $(CC) --version | \ + sed -n -e '/clang-/s/.*clang-\([0-9][0-9]*\).*/\1/p') +ifeq ($(CC_VERSION),) +# probably not clang +CC_VERSION := 0 +endif + +WFLAGS := + +# Warnings are version-dependent, unfortunately, +# so test for version before adding a -W flag. +# Note: gnu make requires $(shell test ...) for "a > b" type tests. +ifeq ($(shell test $(CC_VERSION) -gt 0; echo $$?),0) +WFLAGS += -Weverything +WFLAGS += -Wno-padded +WFLAGS += -Wno-gnu-zero-variadic-macro-arguments +WFLAGS += -Wno-format-nonliteral +WFLAGS += -Wno-unused-macros +WFLAGS += -Wno-disabled-macro-expansion +WFLAGS += -Werror +endif + +ifeq ($(shell test $(CC_VERSION) -gt 600; echo $$?),0) +WFLAGS += -Wno-reserved-id-macro +endif + +CFLAGS := $(WFLAGS) \ + -g \ + -O0 \ + -DL9P_DEBUG=L9P_DEBUG +# Note: to turn on debug, use -DL9P_DEBUG=L9P_DEBUG, +# and set env variable LIB9P_LOGGING to stderr or to +# the (preferably full path name of) the debug log file. + +LIB_SRCS := \ + pack.c \ + connection.c \ + request.c \ + genacl.c \ + log.c \ + hashtable.c \ + utils.c \ + rfuncs.c \ + threadpool.c \ + sbuf/sbuf.c \ + transport/socket.c \ + backend/fs.c + +SERVER_SRCS := \ + example/server.c + +BUILD_DIR := build +LIB_OBJS := $(addprefix build/,$(LIB_SRCS:.c=.o)) +SERVER_OBJS := $(SERVER_SRCS:.c=.o) +LIB := lib9p.dylib +SERVER := server + +all: build $(LIB) $(SERVER) + +$(LIB): $(LIB_OBJS) + cc -dynamiclib $^ -o build/$@ + +$(SERVER): $(SERVER_OBJS) $(LIB) + cc $< -o build/$(SERVER) -Lbuild/ -l9p + +clean: + rm -rf build + rm -f $(SERVER_OBJS) +build: + mkdir build + mkdir build/sbuf + mkdir build/transport + mkdir build/backend + +build/%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ Index: lib/lib9p/Makefile =================================================================== --- /dev/null +++ lib/lib9p/Makefile @@ -0,0 +1,33 @@ +# $FreeBSD$ +# Note: to turn on debug, use -DL9P_DEBUG=L9P_DEBUG, +# and set env variable LIB9P_LOGGING to stderr or to +# the (preferably full path name of) the debug log file. + +CFLAGS+= -DWITH_CASPER -I${.CURDIR} +DEBUG_FLAGS= -DL9P_DEBUG=L9P_DEBUG -DACE_DEBUG -g -O0 + +LIB= 9p +PACKAGE= lib${LIB} +SHLIB_MAJOR= 1 +SHLIBDIR?= /lib +SRCS= connection.c \ + genacl.c \ + hashtable.c \ + log.c \ + pack.c \ + request.c \ + rfuncs.c \ + threadpool.c \ + utils.c \ + backend/fs.c \ + transport/socket.c + +INCSDIR= ${INCLUDEDIR}/lib9p +INCS= fid.h lib9p.h backend/fs.h + +LIBADD= sbuf + +cscope: .PHONY + cd ${.CURDIR}; cscope -buq $$(find . -name '*.[ch]' -print) + +.include Index: lib/lib9p/README.md =================================================================== --- /dev/null +++ lib/lib9p/README.md @@ -0,0 +1,20 @@ +# lib9p + +lib9p is a server library implementing 9p2000, 9p2000.u and 9p2000.L revisions +of 9P protocol. It is being developed primarily as a backend for virtio-9p in +BHyVe, the FreeBSD hypervisor. + +# Features + +* 9p2000, 9p2000.u and 9p2000.L protocol support +* Built-in TCP transport + +# Supported operating systems + +* FreeBSD (>=10) +* macOS (>=10.9) + +# Authors + +* Jakub Klama [jceel](https://github.com/jceel) +* Chris Torek [chris3torek](https://github.com/chris3torek) Index: lib/lib9p/apple_endian.h =================================================================== --- /dev/null +++ lib/lib9p/apple_endian.h @@ -0,0 +1,27 @@ +#ifndef _APPLE_ENDIAN_H +#define _APPLE_ENDIAN_H + +/* + * Shims to make Apple's endian headers and macros compatible + * with (which is awful). + */ + +# include + +# define _LITTLE_ENDIAN 0x12345678 +# define _BIG_ENDIAN 0x87654321 + +# ifdef __LITTLE_ENDIAN__ +# define _BYTE_ORDER _LITTLE_ENDIAN +# endif +# ifdef __BIG_ENDIAN__ +# define _BYTE_ORDER _BIG_ENDIAN +# endif + +# define htole32(x) OSSwapHostToLittleInt32(x) +# define le32toh(x) OSSwapLittleToHostInt32(x) + +# define htobe32(x) OSSwapHostToBigInt32(x) +# define be32toh(x) OSSwapBigToHostInt32(x) + +#endif /* _APPLE_ENDIAN_H */ Index: lib/lib9p/backend/backend.h =================================================================== --- /dev/null +++ lib/lib9p/backend/backend.h @@ -0,0 +1,69 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + + +#ifndef LIB9P_BACKEND_H +#define LIB9P_BACKEND_H + +struct l9p_backend { + void *softc; + void (*freefid)(void *, struct l9p_fid *); + int (*attach)(void *, struct l9p_request *); + int (*clunk)(void *, struct l9p_fid *); + int (*create)(void *, struct l9p_request *); + int (*open)(void *, struct l9p_request *); + int (*read)(void *, struct l9p_request *); + int (*remove)(void *, struct l9p_fid *); + int (*stat)(void *, struct l9p_request *); + int (*walk)(void *, struct l9p_request *); + int (*write)(void *, struct l9p_request *); + int (*wstat)(void *, struct l9p_request *); + int (*statfs)(void *, struct l9p_request *); + int (*lopen)(void *, struct l9p_request *); + int (*lcreate)(void *, struct l9p_request *); + int (*symlink)(void *, struct l9p_request *); + int (*mknod)(void *, struct l9p_request *); + int (*rename)(void *, struct l9p_request *); + int (*readlink)(void *, struct l9p_request *); + int (*getattr)(void *, struct l9p_request *); + int (*setattr)(void *, struct l9p_request *); + int (*xattrwalk)(void *, struct l9p_request *); + int (*xattrcreate)(void *, struct l9p_request *); + int (*xattrread)(void *, struct l9p_request *); + int (*xattrwrite)(void *, struct l9p_request *); + int (*xattrclunk)(void *, struct l9p_fid *); + int (*readdir)(void *, struct l9p_request *); + int (*fsync)(void *, struct l9p_request *); + int (*lock)(void *, struct l9p_request *); + int (*getlock)(void *, struct l9p_request *); + int (*link)(void *, struct l9p_request *); + int (*mkdir)(void *, struct l9p_request *); + int (*renameat)(void *, struct l9p_request *); + int (*unlinkat)(void *, struct l9p_request *); +}; + +#endif /* LIB9P_BACKEND_H */ Index: lib/lib9p/backend/fs.h =================================================================== --- /dev/null +++ lib/lib9p/backend/fs.h @@ -0,0 +1,37 @@ + +/* + * Copyright 2016 Chris Torek + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_BACKEND_FS_H +#define LIB9P_BACKEND_FS_H + +#include +#include "backend.h" + +int l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro); + +#endif /* LIB9P_BACKEND_FS_H */ Index: lib/lib9p/backend/fs.c =================================================================== --- /dev/null +++ lib/lib9p/backend/fs.c @@ -0,0 +1,3031 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Based on libixp code: ゥ2007-2010 Kris Maglione + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../lib9p.h" +#include "../lib9p_impl.h" +#include "../fid.h" +#include "../log.h" +#include "../rfuncs.h" +#include "../genacl.h" +#include "backend.h" +#include "fs.h" + +#if defined(WITH_CASPER) + #include + #include + #include +#endif + +#if defined(__FreeBSD__) + #include + #if __FreeBSD_version >= 1000000 + #define HAVE_BINDAT + #endif +#endif + +#if defined(__FreeBSD__) + #define HAVE_BIRTHTIME +#endif + +#if defined(__APPLE__) + #include "Availability.h" + #define ACL_TYPE_NFS4 ACL_TYPE_EXTENDED +#endif + +struct fs_softc { + int fs_rootfd; + bool fs_readonly; +#if defined(WITH_CASPER) + cap_channel_t *fs_cappwd; + cap_channel_t *fs_capgrp; +#endif +}; + +struct fs_fid { + DIR *ff_dir; + int ff_dirfd; + int ff_fd; + int ff_flags; + char *ff_name; + struct fs_authinfo *ff_ai; + pthread_mutex_t ff_mtx; + struct l9p_acl *ff_acl; /* cached ACL if any */ +}; + +#define FF_NO_NFSV4_ACL 0x01 /* don't go looking for NFSv4 ACLs */ +/* FF_NO_POSIX_ACL 0x02 -- not yet */ + +/* + * Our authinfo consists of: + * + * - a reference count + * - a uid + * - a gid-set + * + * The "default" gid is the first gid in the git-set, provided the + * set size is at least 1. The set-size may be zero, though. + * + * Adjustments to the ref-count must be atomic, once it's shared. + * It would be nice to use C11 atomics here but they are not common + * enough to all systems just yet; for now, we use a mutex. + * + * Note that some ops (Linux style ones) pass an effective gid for + * the op, in which case, that gid may override. To achieve this + * effect, permissions testing functions also take an extra gid. + * If this gid is (gid_t)-1 it is not used and only the remaining + * gids take part. + * + * The uid may also be (uid_t)-1, meaning "no uid was available + * at all at attach time". In this case, new files inherit parent + * directory uids. + * + * The refcount is simply the number of "openfile"s using this + * authinfo (so that when the last ref goes away, we can free it). + * + * There are also master ACL flags (same as in ff_flags). + */ +struct fs_authinfo { + pthread_mutex_t ai_mtx; /* lock for refcnt */ + uint32_t ai_refcnt; + int ai_flags; + uid_t ai_uid; + int ai_ngids; + gid_t ai_gids[]; /* NB: flexible array member */ +}; + +/* + * We have a global-static mutex for single-threading Tattach + * requests, which use getpwnam (and indirectly, getgr* functions) + * which are not reentrant. + */ +static bool fs_attach_mutex_inited; +static pthread_mutex_t fs_attach_mutex; + +/* + * Internal functions (except inline functions). + */ +static struct passwd *fs_getpwuid(struct fs_softc *, uid_t, struct r_pgdata *); +static struct group *fs_getgrgid(struct fs_softc *, gid_t, struct r_pgdata *); +static int fs_buildname(struct l9p_fid *, char *, char *, size_t); +static int fs_pdir(struct fs_softc *, struct l9p_fid *, char *, size_t, + struct stat *st); +static int fs_dpf(char *, char *, size_t); +static int fs_oflags_dotu(int, int *); +static int fs_oflags_dotl(uint32_t, int *, enum l9p_omode *); +static int fs_nde(struct fs_softc *, struct l9p_fid *, bool, gid_t, + struct stat *, uid_t *, gid_t *); +static struct fs_fid *open_fid(int, const char *, struct fs_authinfo *, bool); +static void dostat(struct fs_softc *, struct l9p_stat *, char *, + struct stat *, bool dotu); +static void dostatfs(struct l9p_statfs *, struct statfs *, long); +static void fillacl(struct fs_fid *ff); +static struct l9p_acl *getacl(struct fs_fid *ff, int fd, const char *path); +static void dropacl(struct fs_fid *ff); +static struct l9p_acl *look_for_nfsv4_acl(struct fs_fid *ff, int fd, + const char *path); +static int check_access(int32_t, + struct l9p_acl *, struct stat *, struct l9p_acl *, struct stat *, + struct fs_authinfo *, gid_t); +static void generate_qid(struct stat *, struct l9p_qid *); + +static int fs_icreate(void *, struct l9p_fid *, char *, int, + bool, mode_t, gid_t, struct stat *); +static int fs_iopen(void *, struct l9p_fid *, int, enum l9p_omode, + gid_t, struct stat *); +static int fs_imkdir(void *, struct l9p_fid *, char *, + bool, mode_t, gid_t, struct stat *); +static int fs_imkfifo(void *, struct l9p_fid *, char *, + bool, mode_t, gid_t, struct stat *); +static int fs_imknod(void *, struct l9p_fid *, char *, + bool, mode_t, dev_t, gid_t, struct stat *); +static int fs_imksocket(void *, struct l9p_fid *, char *, + bool, mode_t, gid_t, struct stat *); +static int fs_isymlink(void *, struct l9p_fid *, char *, char *, + gid_t, struct stat *); + +/* + * Internal functions implementing backend. + */ +static int fs_attach(void *, struct l9p_request *); +static int fs_clunk(void *, struct l9p_fid *); +static int fs_create(void *, struct l9p_request *); +static int fs_open(void *, struct l9p_request *); +static int fs_read(void *, struct l9p_request *); +static int fs_remove(void *, struct l9p_fid *); +static int fs_stat(void *, struct l9p_request *); +static int fs_walk(void *, struct l9p_request *); +static int fs_write(void *, struct l9p_request *); +static int fs_wstat(void *, struct l9p_request *); +static int fs_statfs(void *, struct l9p_request *); +static int fs_lopen(void *, struct l9p_request *); +static int fs_lcreate(void *, struct l9p_request *); +static int fs_symlink(void *, struct l9p_request *); +static int fs_mknod(void *, struct l9p_request *); +static int fs_rename(void *, struct l9p_request *); +static int fs_readlink(void *, struct l9p_request *); +static int fs_getattr(void *, struct l9p_request *); +static int fs_setattr(void *, struct l9p_request *); +static int fs_xattrwalk(void *, struct l9p_request *); +static int fs_xattrcreate(void *, struct l9p_request *); +static int fs_readdir(void *, struct l9p_request *); +static int fs_fsync(void *, struct l9p_request *); +static int fs_lock(void *, struct l9p_request *); +static int fs_getlock(void *, struct l9p_request *); +static int fs_link(void *, struct l9p_request *); +static int fs_renameat(void *, struct l9p_request *); +static int fs_unlinkat(void *, struct l9p_request *); +static void fs_freefid(void *, struct l9p_fid *); + +/* + * Convert from 9p2000 open/create mode to Unix-style O_* flags. + * This includes 9p2000.u extensions, but not 9p2000.L protocol, + * which has entirely different open, create, etc., flag bits. + * + * The given here is the one-byte (uint8_t) "mode" + * argument to Tcreate or Topen, so it can have at most 8 bits. + * + * https://swtch.com/plan9port/man/man9/open.html and + * http://plan9.bell-labs.com/magic/man2html/5/open + * both say: + * + * The [low two bits of the] mode field determines the + * type of I/O ... [I]f mode has the OTRUNC (0x10) bit + * set, the file is to be truncated, which requires write + * permission ...; if the mode has the ORCLOSE (0x40) bit + * set, the file is to be removed when the fid is clunked, + * which requires permission to remove the file from its + * directory. All other bits in mode should be zero. It + * is illegal to write a directory, truncate it, or + * attempt to remove it on close. + * + * 9P2000.u may add ODIRECT (0x80); this is not completely clear. + * The fcall.h header defines OCEXEC (0x20) as well, but it makes + * no sense to send this to a server. There seem to be no bits + * 0x04 and 0x08. + * + * We always turn on O_NOCTTY since as a server, we never want + * to gain a controlling terminal. We always turn on O_NOFOLLOW + * for reasons described elsewhere. + */ +static int +fs_oflags_dotu(int mode, int *aflags) +{ + int flags; +#define CONVERT(theirs, ours) \ + do { \ + if (mode & (theirs)) { \ + mode &= ~(theirs); \ + flags |= ours; \ + } \ + } while (0) + + switch (mode & L9P_OACCMODE) { + + case L9P_OREAD: + default: + flags = O_RDONLY; + break; + + case L9P_OWRITE: + flags = O_WRONLY; + break; + + case L9P_ORDWR: + flags = O_RDWR; + break; + + case L9P_OEXEC: + if (mode & L9P_OTRUNC) + return (EINVAL); + flags = O_RDONLY; + break; + } + + flags |= O_NOCTTY | O_NOFOLLOW; + + CONVERT(L9P_OTRUNC, O_TRUNC); + + /* + * Now take away some flags locally: + * the access mode (already translated) + * ORCLOSE - caller only + * OCEXEC - makes no sense in server + * ODIRECT - not applicable here + * If there are any flag bits left after this, + * we were unable to translate them. For now, let's + * treat this as EINVAL so that we can catch problems. + */ + mode &= ~(L9P_OACCMODE | L9P_ORCLOSE | L9P_OCEXEC | L9P_ODIRECT); + if (mode != 0) { + L9P_LOG(L9P_INFO, + "fs_oflags_dotu: untranslated bits: %#x", + (unsigned)mode); + return (EINVAL); + } + + *aflags = flags; + return (0); +#undef CONVERT +} + +/* + * Convert from 9P2000.L (Linux) open mode bits to O_* flags. + * See fs_oflags_dotu above. + * + * Linux currently does not have open-for-exec, but there is a + * proposal for it using O_PATH|O_NOFOLLOW, now handled here. + * + * We may eventually also set L9P_ORCLOSE for L_O_TMPFILE. + */ +static int +fs_oflags_dotl(uint32_t l_mode, int *aflags, enum l9p_omode *ap9) +{ + int flags; + enum l9p_omode p9; +#define CLEAR(theirs) l_mode &= ~(uint32_t)(theirs) +#define CONVERT(theirs, ours) \ + do { \ + if (l_mode & (theirs)) { \ + CLEAR(theirs); \ + flags |= ours; \ + } \ + } while (0) + + /* + * Linux O_RDONLY, O_WRONLY, O_RDWR (0,1,2) match BSD/MacOS. + */ + flags = l_mode & O_ACCMODE; + if (flags == 3) + return (EINVAL); + CLEAR(O_ACCMODE); + + if ((l_mode & (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) == + (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) { + CLEAR(L9P_L_O_PATH | L9P_L_O_NOFOLLOW); + p9 = L9P_OEXEC; + } else { + /* + * Slightly dirty, but same dirt, really, as + * setting flags from l_mode & O_ACCMODE. + */ + p9 = (enum l9p_omode)flags; /* slightly dirty */ + } + + /* turn L_O_TMPFILE into L9P_ORCLOSE in *p9? */ + if (l_mode & L9P_L_O_TRUNC) + p9 |= L9P_OTRUNC; /* but don't CLEAR yet */ + + flags |= O_NOCTTY | O_NOFOLLOW; + + /* + * L_O_CREAT seems to be noise, since we get separate open + * and create. But it is actually set sometimes. We just + * throw it out here; create ops must set it themselves and + * open ops have no permissions bits and hence cannot create. + * + * L_O_EXCL does make sense on create ops, i.e., we can + * take a create op with or without L_O_EXCL. We pass that + * through. + */ + CLEAR(L9P_L_O_CREAT); + CONVERT(L9P_L_O_EXCL, O_EXCL); + CONVERT(L9P_L_O_TRUNC, O_TRUNC); + CONVERT(L9P_L_O_DIRECTORY, O_DIRECTORY); + CONVERT(L9P_L_O_APPEND, O_APPEND); + CONVERT(L9P_L_O_NONBLOCK, O_NONBLOCK); + + /* + * Discard these as useless noise at our (server) end. + * (NOATIME might be useful but we can only set it on a + * per-mount basis.) + */ + CLEAR(L9P_L_O_CLOEXEC); + CLEAR(L9P_L_O_DIRECT); + CLEAR(L9P_L_O_DSYNC); + CLEAR(L9P_L_O_FASYNC); + CLEAR(L9P_L_O_LARGEFILE); + CLEAR(L9P_L_O_NOATIME); + CLEAR(L9P_L_O_NOCTTY); + CLEAR(L9P_L_O_NOFOLLOW); + CLEAR(L9P_L_O_SYNC); + + if (l_mode != 0) { + L9P_LOG(L9P_INFO, + "fs_oflags_dotl: untranslated bits: %#x", + (unsigned)l_mode); + return (EINVAL); + } + + *aflags = flags; + *ap9 = p9; + return (0); +#undef CLEAR +#undef CONVERT +} + +static struct passwd * +fs_getpwuid(struct fs_softc *sc, uid_t uid, struct r_pgdata *pg) +{ +#if defined(WITH_CASPER) + return (r_cap_getpwuid(sc->fs_cappwd, uid, pg)); +#else + (void)sc; + return (r_getpwuid(uid, pg)); +#endif +} + +static struct group * +fs_getgrgid(struct fs_softc *sc, gid_t gid, struct r_pgdata *pg) +{ +#if defined(WITH_CASPER) + return (r_cap_getgrgid(sc->fs_capgrp, gid, pg)); +#else + (void)sc; + return (r_getgrgid(gid, pg)); +#endif +} + +/* + * Build full name of file by appending given name to directory name. + */ +static int +fs_buildname(struct l9p_fid *dir, char *name, char *buf, size_t size) +{ + struct fs_fid *dirf = dir->lo_aux; + size_t dlen, nlen1; + + assert(dirf != NULL); + dlen = strlen(dirf->ff_name); + nlen1 = strlen(name) + 1; /* +1 for '\0' */ + if (dlen + 1 + nlen1 > size) + return (ENAMETOOLONG); + memcpy(buf, dirf->ff_name, dlen); + buf[dlen] = '/'; + memcpy(buf + dlen + 1, name, nlen1); + return (0); +} + +/* + * Build parent name of file by splitting it off. Return an error + * if the given fid represents the root, so that there is no such + * parent, or if the discovered parent is not a directory. + */ +static int +fs_pdir(struct fs_softc *sc __unused, struct l9p_fid *fid, char *buf, + size_t size, struct stat *st) +{ + struct fs_fid *ff; + char *path; + + ff = fid->lo_aux; + assert(ff != NULL); + path = ff->ff_name; + path = r_dirname(path, buf, size); + if (path == NULL) + return (ENAMETOOLONG); + if (fstatat(ff->ff_dirfd, path, st, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + if (!S_ISDIR(st->st_mode)) + return (ENOTDIR); + return (0); +} + +/* + * Like fs_buildname() but for adding a file name to a buffer + * already holding a directory name. Essentially does + * strcat(dbuf, "/"); + * strcat(dbuf, fname); + * but with size checking and an ENAMETOOLONG error as needed. + * + * (Think of the function name as "directory plus-equals file".) + */ +static int +fs_dpf(char *dbuf, char *fname, size_t size) +{ + size_t dlen, nlen1; + + dlen = strlen(dbuf); + nlen1 = strlen(fname) + 1; + if (dlen + 1 + nlen1 > size) + return (ENAMETOOLONG); + dbuf[dlen] = '/'; + memcpy(dbuf + dlen + 1, fname, nlen1); + return (0); +} + +/* + * Prepare to create a new directory entry (open with O_CREAT, + * mkdir, etc -- any operation that creates a new inode), + * operating in parent data , based on authinfo and + * effective gid . + * + * The new entity should be owned by user/group <*nuid, *ngid>, + * if it's really a new entity. It will be a directory if isdir. + * + * Returns an error number if the entry should not be created + * (e.g., read-only file system or no permission to write in + * parent directory). Always sets *nuid and *ngid on success: + * in the worst case, when there is no available ID, this will + * use the parent directory's IDs. Fills in <*st> on success. + */ +static int +fs_nde(struct fs_softc *sc, struct l9p_fid *dir, bool isdir, gid_t egid, + struct stat *st, uid_t *nuid, gid_t *ngid) +{ + struct fs_fid *dirf; + struct fs_authinfo *ai; + int32_t op; + int error; + + if (sc->fs_readonly) + return (EROFS); + dirf = dir->lo_aux; + assert(dirf != NULL); + if (fstatat(dirf->ff_dirfd, dirf->ff_name, st, + AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + if (!S_ISDIR(st->st_mode)) + return (ENOTDIR); + dirf = dir->lo_aux; + ai = dirf->ff_ai; + fillacl(dirf); + op = isdir ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE; + error = check_access(op, dirf->ff_acl, st, NULL, NULL, ai, egid); + if (error) + return (EPERM); + + *nuid = ai->ai_uid != (uid_t)-1 ? ai->ai_uid : st->st_uid; + *ngid = egid != (gid_t)-1 ? egid : + ai->ai_ngids > 0 ? ai->ai_gids[0] : st->st_gid; + return (0); +} + +/* + * Allocate new open-file data structure to attach to a fid. + * + * The new file's authinfo is the same as the old one's, and + * we gain a reference. + */ +static struct fs_fid * +open_fid(int dirfd, const char *path, struct fs_authinfo *ai, bool creating) +{ + struct fs_fid *ret; + uint32_t newcount; + int error; + + ret = l9p_calloc(1, sizeof(*ret)); + error = pthread_mutex_init(&ret->ff_mtx, NULL); + if (error) { + free(ret); + return (NULL); + } + ret->ff_fd = -1; + ret->ff_dirfd = dirfd; + ret->ff_name = strdup(path); + if (ret->ff_name == NULL) { + pthread_mutex_destroy(&ret->ff_mtx); + free(ret); + return (NULL); + } + pthread_mutex_lock(&ai->ai_mtx); + newcount = ++ai->ai_refcnt; + pthread_mutex_unlock(&ai->ai_mtx); + /* + * If we just incremented the count to 1, we're the *first* + * reference. This is only allowed when creating the authinfo, + * otherwise it means something has gone wrong. This cannot + * catch every bad (re)use of a freed authinfo but it may catch + * a few. + */ + assert(newcount > 1 || creating); + L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu", + (void *)ai, (u_long)newcount); + ret->ff_ai = ai; + return (ret); +} + +static void +dostat(struct fs_softc *sc, struct l9p_stat *s, char *name, + struct stat *buf, bool dotu) +{ + struct passwd *user; + struct group *group; + + memset(s, 0, sizeof(struct l9p_stat)); + + generate_qid(buf, &s->qid); + + s->type = 0; + s->dev = 0; + s->mode = buf->st_mode & 0777; + + if (S_ISDIR(buf->st_mode)) + s->mode |= L9P_DMDIR; + + if (S_ISLNK(buf->st_mode) && dotu) + s->mode |= L9P_DMSYMLINK; + + if (S_ISCHR(buf->st_mode) || S_ISBLK(buf->st_mode)) + s->mode |= L9P_DMDEVICE; + + if (S_ISSOCK(buf->st_mode)) + s->mode |= L9P_DMSOCKET; + + if (S_ISFIFO(buf->st_mode)) + s->mode |= L9P_DMNAMEDPIPE; + + s->atime = (uint32_t)buf->st_atime; + s->mtime = (uint32_t)buf->st_mtime; + s->length = (uint64_t)buf->st_size; + + s->name = r_basename(name, NULL, 0); + + if (!dotu) { + struct r_pgdata udata, gdata; + + user = fs_getpwuid(sc, buf->st_uid, &udata); + group = fs_getgrgid(sc, buf->st_gid, &gdata); + s->uid = user != NULL ? strdup(user->pw_name) : NULL; + s->gid = group != NULL ? strdup(group->gr_name) : NULL; + s->muid = user != NULL ? strdup(user->pw_name) : NULL; + r_pgfree(&udata); + r_pgfree(&gdata); + } else { + /* + * When using 9P2000.u, we don't need to bother about + * providing user and group names in textual form. + * + * NB: if the asprintf()s fail, s->extension should + * be unset so we can ignore these. + */ + s->n_uid = buf->st_uid; + s->n_gid = buf->st_gid; + s->n_muid = buf->st_uid; + + if (S_ISLNK(buf->st_mode)) { + char target[MAXPATHLEN]; + ssize_t ret = readlink(name, target, MAXPATHLEN); + + if (ret < 0) { + s->extension = NULL; + return; + } + + s->extension = strndup(target, (size_t)ret); + } + + if (S_ISBLK(buf->st_mode)) { + asprintf(&s->extension, "b %d %d", major(buf->st_rdev), + minor(buf->st_rdev)); + } + + if (S_ISCHR(buf->st_mode)) { + asprintf(&s->extension, "c %d %d", major(buf->st_rdev), + minor(buf->st_rdev)); + } + } +} + +static void dostatfs(struct l9p_statfs *out, struct statfs *in, long namelen) +{ + + out->type = L9P_FSTYPE; + out->bsize = in->f_bsize; + out->blocks = in->f_blocks; + out->bfree = in->f_bfree; + out->bavail = in->f_bavail; + out->files = in->f_files; + out->ffree = in->f_ffree; + out->namelen = (uint32_t)namelen; + out->fsid = ((uint64_t)in->f_fsid.val[0] << 32) | + (uint64_t)in->f_fsid.val[1]; +} + +static void +generate_qid(struct stat *buf, struct l9p_qid *qid) +{ + qid->path = buf->st_ino; + qid->version = 0; + + if (S_ISREG(buf->st_mode)) + qid->type |= L9P_QTFILE; + + if (S_ISDIR(buf->st_mode)) + qid->type |= L9P_QTDIR; + + if (S_ISLNK(buf->st_mode)) + qid->type |= L9P_QTSYMLINK; +} + +/* + * Fill in ff->ff_acl if it's not set yet. Skip if the "don't use + * ACLs" flag is set, and use the flag to remember failure so + * we don't bother retrying either. + */ +static void +fillacl(struct fs_fid *ff) +{ + + if (ff->ff_acl == NULL && (ff->ff_flags & FF_NO_NFSV4_ACL) == 0) { + ff->ff_acl = look_for_nfsv4_acl(ff, ff->ff_fd, ff->ff_name); + if (ff->ff_acl == NULL) + ff->ff_flags |= FF_NO_NFSV4_ACL; + } +} + +/* + * Get an ACL given fd and/or path name. We check for the "don't get + * ACL" flag in the given ff_fid data structure first, but don't set + * the flag here. The fillacl() code is similar but will set the + * flag; it also uses the ff_fd and ff_name directly. + * + * (This is used to get ACLs for parent directories, for instance.) + */ +static struct l9p_acl * +getacl(struct fs_fid *ff, int fd, const char *path) +{ + + if (ff->ff_flags & FF_NO_NFSV4_ACL) + return (NULL); + return look_for_nfsv4_acl(ff, fd, path); +} + +/* + * Drop cached ff->ff_acl, e.g., after moving from one directory to + * another, where inherited ACLs might change. + */ +static void +dropacl(struct fs_fid *ff) +{ + + l9p_acl_free(ff->ff_acl); + ff->ff_acl = NULL; + ff->ff_flags = ff->ff_ai->ai_flags; +} + +/* + * Check to see if we can find NFSv4 ACLs for the given file. + * If we have an open fd, we can use that, otherwise we need + * to use the path. + */ +static struct l9p_acl * +look_for_nfsv4_acl(struct fs_fid *ff, int fd, const char *path) +{ + struct l9p_acl *acl; + acl_t sysacl; + int doclose = 0; + + if (fd < 0) { + fd = openat(ff->ff_dirfd, path, 0); + doclose = 1; + } + + sysacl = acl_get_fd_np(fd, ACL_TYPE_NFS4); + if (sysacl == NULL) { + /* + * EINVAL means no NFSv4 ACLs apply for this file. + * Other error numbers indicate some kind of problem. + */ + if (errno != EINVAL) { + L9P_LOG(L9P_ERROR, + "error retrieving NFSv4 ACL from " + "fdesc %d (%s): %s", fd, + path, strerror(errno)); + } + + if (doclose) + close(fd); + + return (NULL); + } +#if defined(HAVE_FREEBSD_ACLS) + acl = l9p_freebsd_nfsv4acl_to_acl(sysacl); +#else + acl = NULL; /* XXX need a l9p_darwin_acl_to_acl */ +#endif + acl_free(sysacl); + + if (doclose) + close(fd); + + return (acl); +} + +/* + * Verify that the user whose authinfo is in and effective + * group ID is ((gid_t)-1 means no egid supplied) has + * permission to do something. + * + * The "something" may be rather complex: we allow NFSv4 style + * operation masks here, and provide parent and child ACLs and + * stat data. At most one of pacl+pst and cacl+cst can be NULL, + * unless ACLs are not supported; then pacl and cacl can both + * be NULL but pst or cst must be non-NULL depending on the + * operation. + */ +static int +check_access(int32_t opmask, + struct l9p_acl *pacl, struct stat *pst, + struct l9p_acl *cacl, struct stat *cst, + struct fs_authinfo *ai, gid_t egid) +{ + struct l9p_acl_check_args args; + + /* + * If we have ACLs, use them exclusively, ignoring Unix + * permissions. Otherwise, fall back on stat st_mode + * bits, and allow super-user as well. + */ + args.aca_uid = ai->ai_uid; + args.aca_gid = egid; + args.aca_groups = ai->ai_gids; + args.aca_ngroups = (size_t)ai->ai_ngids; + args.aca_parent = pacl; + args.aca_pstat = pst; + args.aca_child = cacl; + args.aca_cstat = cst; + args.aca_aclmode = pacl == NULL && cacl == NULL + ? L9P_ACM_STAT_MODE + : L9P_ACM_NFS_ACL | L9P_ACM_ZFS_ACL; + + args.aca_superuser = true; + return (l9p_acl_check_access(opmask, &args)); +} + +static int +fs_attach(void *softc, struct l9p_request *req) +{ + struct fs_authinfo *ai; + struct fs_softc *sc = (struct fs_softc *)softc; + struct fs_fid *file; + struct passwd *pwd; + struct stat st; + struct r_pgdata udata; + uint32_t n_uname; + gid_t *gids; + uid_t uid; + int error; + int ngroups; + + assert(req->lr_fid != NULL); + + /* + * Single-thread pwd/group related items. We have a reentrant + * r_getpwuid but not a reentrant r_getpwnam, and l9p_getgrlist + * may use non-reentrant C library getgr* routines. + */ + pthread_mutex_lock(&fs_attach_mutex); + + n_uname = req->lr_req.tattach.n_uname; + if (n_uname != L9P_NONUNAME) { + uid = (uid_t)n_uname; + pwd = fs_getpwuid(sc, uid, &udata); + if (pwd == NULL) + L9P_LOG(L9P_DEBUG, + "Tattach: uid %ld: no such user", (long)uid); + } else { + uid = (uid_t)-1; +#if defined(WITH_CASPER) + pwd = cap_getpwnam(sc->fs_cappwd, req->lr_req.tattach.uname); +#else + pwd = getpwnam(req->lr_req.tattach.uname); +#endif + if (pwd == NULL) + L9P_LOG(L9P_DEBUG, + "Tattach: %s: no such user", + req->lr_req.tattach.uname); + } + + /* + * If caller didn't give a numeric UID, pick it up from pwd + * if possible. If that doesn't work we can't continue. + * + * Note that pwd also supplies the group set. This assumes + * the server has the right mapping; this needs improvement. + * We do at least support ai->ai_ngids==0 properly now though. + */ + if (uid == (uid_t)-1 && pwd != NULL) + uid = pwd->pw_uid; + if (uid == (uid_t)-1) + error = EPERM; + else { + error = 0; + if (fstat(sc->fs_rootfd, &st) != 0) + error = errno; + else if (!S_ISDIR(st.st_mode)) + error = ENOTDIR; + } + if (error) { + pthread_mutex_unlock(&fs_attach_mutex); + L9P_LOG(L9P_DEBUG, + "Tattach: denying uid=%ld access to rootdir: %s", + (long)uid, strerror(error)); + /* + * Pass ENOENT and ENOTDIR through for diagnosis; + * others become EPERM. This should not leak too + * much security. + */ + return (error == ENOENT || error == ENOTDIR ? error : EPERM); + } + + if (pwd != NULL) { + /* + * This either succeeds and fills in ngroups and + * returns non-NULL, or fails and sets ngroups to 0 + * and returns NULL. Either way ngroups is correct. + */ + gids = l9p_getgrlist(pwd->pw_name, pwd->pw_gid, &ngroups); + } else { + gids = NULL; + ngroups = 0; + } + + /* + * Done with pwd and group related items that may use + * non-reentrant C library routines; allow other threads in. + */ + pthread_mutex_unlock(&fs_attach_mutex); + + ai = malloc(sizeof(*ai) + (size_t)ngroups * sizeof(gid_t)); + if (ai == NULL) { + free(gids); + return (ENOMEM); + } + error = pthread_mutex_init(&ai->ai_mtx, NULL); + if (error) { + free(gids); + free(ai); + return (error); + } + ai->ai_refcnt = 0; + ai->ai_uid = uid; + ai->ai_flags = 0; /* XXX for now */ + ai->ai_ngids = ngroups; + memcpy(ai->ai_gids, gids, (size_t)ngroups * sizeof(gid_t)); + free(gids); + + file = open_fid(sc->fs_rootfd, ".", ai, true); + if (file == NULL) { + pthread_mutex_destroy(&ai->ai_mtx); + free(ai); + return (ENOMEM); + } + + req->lr_fid->lo_aux = file; + generate_qid(&st, &req->lr_resp.rattach.qid); + return (0); +} + +static int +fs_clunk(void *softc __unused, struct l9p_fid *fid) +{ + struct fs_fid *file; + + file = fid->lo_aux; + assert(file != NULL); + + if (file->ff_dir) { + closedir(file->ff_dir); + file->ff_dir = NULL; + } else if (file->ff_fd != -1) { + close(file->ff_fd); + file->ff_fd = -1; + } + + return (0); +} + +/* + * Create ops. + * + * We are to create a new file under some existing path, + * where the new file's name is in the Tcreate request and the + * existing path is due to a fid-based file (req->lr_fid). + * + * One op (create regular file) sets file->fd, the rest do not. + */ +static int +fs_create(void *softc, struct l9p_request *req) +{ + struct l9p_fid *dir; + struct stat st; + uint32_t dmperm; + mode_t perm; + char *name; + int error; + + dir = req->lr_fid; + name = req->lr_req.tcreate.name; + dmperm = req->lr_req.tcreate.perm; + perm = (mode_t)(dmperm & 0777); + + if (dmperm & L9P_DMDIR) + error = fs_imkdir(softc, dir, name, true, + perm, (gid_t)-1, &st); + else if (dmperm & L9P_DMSYMLINK) + error = fs_isymlink(softc, dir, name, + req->lr_req.tcreate.extension, (gid_t)-1, &st); + else if (dmperm & L9P_DMNAMEDPIPE) + error = fs_imkfifo(softc, dir, name, true, + perm, (gid_t)-1, &st); + else if (dmperm & L9P_DMSOCKET) + error = fs_imksocket(softc, dir, name, true, + perm, (gid_t)-1, &st); + else if (dmperm & L9P_DMDEVICE) { + unsigned int major, minor; + char type; + dev_t dev; + + /* + * ??? Should this be testing < 3? For now, allow a single + * integer mode with minor==0 implied. + */ + minor = 0; + if (sscanf(req->lr_req.tcreate.extension, "%c %u %u", + &type, &major, &minor) < 2) { + return (EINVAL); + } + + switch (type) { + case 'b': + perm |= S_IFBLK; + break; + case 'c': + perm |= S_IFCHR; + break; + default: + return (EINVAL); + } + dev = makedev(major, minor); + error = fs_imknod(softc, dir, name, true, perm, dev, + (gid_t)-1, &st); + } else { + enum l9p_omode p9; + int flags; + + p9 = req->lr_req.tcreate.mode; + error = fs_oflags_dotu(p9, &flags); + if (error) + return (error); + error = fs_icreate(softc, dir, name, flags, + true, perm, (gid_t)-1, &st); + req->lr_resp.rcreate.iounit = req->lr_conn->lc_max_io_size; + } + + if (error == 0) + generate_qid(&st, &req->lr_resp.rcreate.qid); + + return (error); +} + +/* + * https://swtch.com/plan9port/man/man9/open.html and + * http://plan9.bell-labs.com/magic/man2html/5/open + * say that permissions are actually + * perm & (~0666 | (dir.perm & 0666)) + * for files, and + * perm & (~0777 | (dir.perm & 0777)) + * for directories. That is, the parent directory may + * take away permissions granted by the operation. + * + * This seems a bit restrictive; probably + * there should be a control knob for this. + */ +static inline mode_t +fs_p9perm(mode_t perm, mode_t dir_perm, bool isdir) +{ + + if (isdir) + perm &= ~0777 | (dir_perm & 0777); + else + perm &= ~0666 | (dir_perm & 0666); + return (perm); +} + +/* + * Internal form of create (plain file). + * + * Our caller takes care of splitting off all the special + * types of create (mknod, etc), so this is purely for files. + * We receive the fs_softc , the directory fid + * in which the new file is to be created, the name of the + * new file, a flag indicating whether to do plan9 style + * permissions or Linux style permissions, the permissions , + * an effective group id , and a pointer to a stat structure + * to fill in describing the final result on success. + * + * On successful create, the fid switches to the newly created + * file, which is now open; its associated file-name changes too. + * + * Note that the original (dir) fid is never currently open, + * so there is nothing to close. + */ +static int +fs_icreate(void *softc, struct l9p_fid *dir, char *name, int flags, + bool isp9, mode_t perm, gid_t egid, struct stat *st) +{ + struct fs_fid *file; + gid_t gid; + uid_t uid; + char newname[MAXPATHLEN]; + int error, fd; + + file = dir->lo_aux; + + /* + * Build full path name from directory + file name. We'll + * check permissions on the parent directory, then race to + * create the file before anything bad happens like symlinks. + * + * (To close this race we need to use openat(), which is + * left for a later version of this code.) + */ + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + /* In case of success, we will need a new file->ff_name. */ + name = strdup(newname); + if (name == NULL) + return (ENOMEM); + + /* Check create permission and compute new file ownership. */ + error = fs_nde(softc, dir, false, egid, st, &uid, &gid); + if (error) { + free(name); + return (error); + } + + /* Adjust new-file permissions for Plan9 protocol. */ + if (isp9) + perm = fs_p9perm(perm, st->st_mode, false); + + /* Create is always exclusive so O_TRUNC is irrelevant. */ + fd = openat(file->ff_dirfd, newname, flags | O_CREAT | O_EXCL, perm); + if (fd < 0) { + error = errno; + free(name); + return (error); + } + + /* Fix permissions and owner. */ + if (fchmod(fd, perm) != 0 || + fchown(fd, uid, gid) != 0 || + fstat(fd, st) != 0) { + error = errno; + (void) close(fd); + /* unlink(newname); ? */ + free(name); + return (error); + } + + /* It *was* a directory; now it's a file, and it's open. */ + free(file->ff_name); + file->ff_name = name; + file->ff_fd = fd; + return (0); +} + +/* + * Internal form of open: stat file and verify permissions (from p9 + * argument), then open the file-or-directory, leaving the internal + * fs_fid fields set up. If we cannot open the file, return a + * suitable error number, and leave everything unchanged. + * + * To mitigate the race between permissions testing and the actual + * open, we can stat the file twice (once with lstat() before open, + * then with fstat() after). We assume O_NOFOLLOW is set in flags, + * so if some other race-winner substitutes in a symlink we won't + * open it here. (However, embedded symlinks, if they occur, are + * still an issue. Ideally we would like to have an O_NEVERFOLLOW + * that fails on embedded symlinks, and a way to pass this to + * lstat() as well.) + * + * When we use opendir() we cannot pass O_NOFOLLOW, so we must rely + * on substitution-detection via fstat(). To simplify the code we + * just always re-check. + * + * (For a proper fix in the future, we can require openat(), keep + * each parent directory open during walk etc, and allow only final + * name components with O_NOFOLLOW.) + * + * On successful return, st has been filled in. + */ +static int +fs_iopen(void *softc, struct l9p_fid *fid, int flags, enum l9p_omode p9, + gid_t egid __unused, struct stat *st) +{ + struct fs_softc *sc = softc; + struct fs_fid *file; + struct stat first; + int32_t op; + char *name; + int error; + int fd; + DIR *dirp; + + /* Forbid write ops on read-only file system. */ + if (sc->fs_readonly) { + if ((flags & O_TRUNC) != 0) + return (EROFS); + if ((flags & O_ACCMODE) != O_RDONLY) + return (EROFS); + if (p9 & L9P_ORCLOSE) + return (EROFS); + } + + file = fid->lo_aux; + assert(file != NULL); + name = file->ff_name; + + if (fstatat(file->ff_dirfd, name, &first, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + if (S_ISLNK(first.st_mode)) + return (EPERM); + + /* Can we rely on O_APPEND here? Best not, can be cleared. */ + switch (flags & O_ACCMODE) { + case O_RDONLY: + op = L9P_ACE_READ_DATA; + break; + case O_WRONLY: + op = L9P_ACE_WRITE_DATA; + break; + case O_RDWR: + op = L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA; + break; + default: + return (EINVAL); + } + fillacl(file); + error = check_access(op, NULL, NULL, file->ff_acl, &first, + file->ff_ai, (gid_t)-1); + if (error) + return (error); + + if (S_ISDIR(first.st_mode)) { + /* Forbid write or truncate on directory. */ + if ((flags & O_ACCMODE) != O_RDONLY || (flags & O_TRUNC)) + return (EPERM); + fd = openat(file->ff_dirfd, name, O_DIRECTORY); + dirp = fdopendir(fd); + if (dirp == NULL) + return (EPERM); + fd = dirfd(dirp); + } else { + dirp = NULL; + fd = openat(file->ff_dirfd, name, flags); + if (fd < 0) + return (EPERM); + } + + /* + * We have a valid fd, and maybe non-null dirp. Re-check + * the file, and fail if st_dev or st_ino changed. + */ + if (fstat(fd, st) != 0 || + first.st_dev != st->st_dev || + first.st_ino != st->st_ino) { + if (dirp != NULL) + (void) closedir(dirp); + else + (void) close(fd); + return (EPERM); + } + if (dirp != NULL) + file->ff_dir = dirp; + else + file->ff_fd = fd; + return (0); +} + +/* + * Internal form of mkdir (common code for all forms). + * We receive the fs_softc , the directory fid + * in which the new entry is to be created, the name of the + * new entry, a flag indicating whether to do plan9 style + * permissions or Linux style permissions, the permissions , + * an effective group id , and a pointer to a stat structure + * to fill in describing the final result on success. + * + * See also fs_icreate() above. + */ +static int +fs_imkdir(void *softc, struct l9p_fid *dir, char *name, + bool isp9, mode_t perm, gid_t egid, struct stat *st) +{ + struct fs_fid *ff; + gid_t gid; + uid_t uid; + char newname[MAXPATHLEN]; + int error, fd; + + ff = dir->lo_aux; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + error = fs_nde(softc, dir, true, egid, st, &uid, &gid); + if (error) + return (error); + + if (isp9) + perm = fs_p9perm(perm, st->st_mode, true); + + if (mkdirat(ff->ff_dirfd, newname, perm) != 0) + return (errno); + + fd = openat(ff->ff_dirfd, newname, + O_DIRECTORY | O_RDONLY | O_NOFOLLOW); + if (fd < 0 || + fchown(fd, uid, gid) != 0 || + fchmod(fd, perm) != 0 || + fstat(fd, st) != 0) { + error = errno; + /* rmdir(newname) ? */ + } + if (fd >= 0) + (void) close(fd); + + return (error); +} + +/* + * Internal form of mknod (special device). + * + * The device type (S_IFBLK, S_IFCHR) is included in the parameter. + */ +static int +fs_imknod(void *softc, struct l9p_fid *dir, char *name, + bool isp9, mode_t mode, dev_t dev, gid_t egid, struct stat *st) +{ + struct fs_fid *ff; + mode_t perm; + gid_t gid; + uid_t uid; + char newname[MAXPATHLEN]; + int error; + + ff = dir->lo_aux; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + error = fs_nde(softc, dir, false, egid, st, &uid, &gid); + if (error) + return (error); + + if (isp9) { + perm = fs_p9perm(mode & 0777, st->st_mode, false); + mode = (mode & ~0777) | perm; + } else { + perm = mode & 0777; + } + + if (mknodat(ff->ff_dirfd, newname, mode, dev) != 0) + return (errno); + + /* We cannot open the new name; race to use l* syscalls. */ + if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 || + fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 || + fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0) + error = errno; + else if ((st->st_mode & S_IFMT) != (mode & S_IFMT)) + error = EPERM; /* ??? lost a race anyway */ + + /* if (error) unlink(newname) ? */ + + return (error); +} + +/* + * Internal form of mkfifo. + */ +static int +fs_imkfifo(void *softc, struct l9p_fid *dir, char *name, + bool isp9, mode_t perm, gid_t egid, struct stat *st) +{ + struct fs_fid *ff; + gid_t gid; + uid_t uid; + char newname[MAXPATHLEN]; + int error; + + ff = dir->lo_aux; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + error = fs_nde(softc, dir, false, egid, st, &uid, &gid); + if (error) + return (error); + + if (isp9) + perm = fs_p9perm(perm, st->st_mode, false); + + if (mkfifo(newname, perm) != 0) + return (errno); + + /* We cannot open the new name; race to use l* syscalls. */ + if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 || + fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 || + fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0) + error = errno; + else if (!S_ISFIFO(st->st_mode)) + error = EPERM; /* ??? lost a race anyway */ + + /* if (error) unlink(newname) ? */ + + return (error); +} + +/* + * Internal form of mksocket. + * + * This is a bit different because of the horrible socket naming + * system (bind() with sockaddr_un sun_path). + */ +static int +fs_imksocket(void *softc, struct l9p_fid *dir, char *name, + bool isp9, mode_t perm, gid_t egid, struct stat *st) +{ + struct fs_fid *ff; + struct sockaddr_un sun; + char *path; + char newname[MAXPATHLEN]; + gid_t gid; + uid_t uid; + int error = 0, s, fd; + + ff = dir->lo_aux; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + error = fs_nde(softc, dir, false, egid, st, &uid, &gid); + if (error) + return (error); + + if (isp9) + perm = fs_p9perm(perm, st->st_mode, false); + + s = socket(AF_UNIX, SOCK_STREAM, 0); + if (s < 0) + return (errno); + + path = newname; + fd = -1; +#ifdef HAVE_BINDAT + /* Try bindat() if needed. */ + if (strlen(path) >= sizeof(sun.sun_path)) { + fd = openat(ff->ff_dirfd, ff->ff_name, + O_RDONLY | O_DIRECTORY | O_NOFOLLOW); + if (fd >= 0) + path = name; + } +#endif + + /* + * Can only create the socket if the path will fit. + * Even if we are using bindat() there are limits + * (the API for AF_UNIX sockets is ... not good). + * + * Note: in theory we can fill sun_path to the end + * (omitting a terminating '\0') but in at least one + * Unix-like system, this was known to behave oddly, + * so we test for ">=" rather than just ">". + */ + if (strlen(path) >= sizeof(sun.sun_path)) { + error = ENAMETOOLONG; + goto out; + } + sun.sun_family = AF_UNIX; + sun.sun_len = sizeof(struct sockaddr_un); + strncpy(sun.sun_path, path, sizeof(sun.sun_path)); + +#ifdef HAVE_BINDAT + if (fd >= 0) { + if (bindat(fd, s, (struct sockaddr *)&sun, sun.sun_len) < 0) + error = errno; + goto out; /* done now, for good or ill */ + } +#endif + + if (bind(s, (struct sockaddr *)&sun, sun.sun_len) < 0) + error = errno; +out: + + if (error == 0) { + /* + * We believe we created the socket-inode. Fix + * permissions etc. Note that we cannot use + * fstat() on the socket descriptor: it succeeds, + * but we get bogus data! + */ + if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 || + fchmodat(ff->ff_dirfd, newname, perm, AT_SYMLINK_NOFOLLOW) != 0 || + fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0) + error = errno; + else if (!S_ISSOCK(st->st_mode)) + error = EPERM; /* ??? lost a race anyway */ + + /* if (error) unlink(newname) ? */ + } + + /* + * It's not clear which error should override, although + * ideally we should never see either close() call fail. + * In any case we do want to try to close both fd and s, + * always. Let's set error only if it is not already set, + * so that all exit paths can use the same code. + */ + if (fd >= 0 && close(fd) != 0) + if (error == 0) + error = errno; + if (close(s) != 0) + if (error == 0) + error = errno; + + return (error); +} + +/* + * Internal form of symlink. + * + * Note that symlinks are presumed to carry no permission bits. + * They do have owners, however (who may be charged for quotas). + */ +static int +fs_isymlink(void *softc, struct l9p_fid *dir, char *name, + char *symtgt, gid_t egid, struct stat *st) +{ + struct fs_fid *ff; + gid_t gid; + uid_t uid; + char newname[MAXPATHLEN]; + int error; + + ff = dir->lo_aux; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + error = fs_nde(softc, dir, false, egid, st, &uid, &gid); + if (error) + return (error); + + if (symlinkat(symtgt, ff->ff_dirfd, newname) != 0) + return (errno); + + /* We cannot open the new name; race to use l* syscalls. */ + if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 || + fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0) + error = errno; + else if (!S_ISLNK(st->st_mode)) + error = EPERM; /* ??? lost a race anyway */ + + /* if (error) unlink(newname) ? */ + + return (error); +} + +static int +fs_open(void *softc, struct l9p_request *req) +{ + struct l9p_fid *fid = req->lr_fid; + struct stat st; + enum l9p_omode p9; + int error, flags; + + p9 = req->lr_req.topen.mode; + error = fs_oflags_dotu(p9, &flags); + if (error) + return (error); + + error = fs_iopen(softc, fid, flags, p9, (gid_t)-1, &st); + if (error) + return (error); + + generate_qid(&st, &req->lr_resp.ropen.qid); + req->lr_resp.ropen.iounit = req->lr_conn->lc_max_io_size; + return (0); +} + +/* + * Helper for directory read. We want to run an lstat on each + * file name within the directory. This is a lot faster if we + * have lstatat (or fstatat with AT_SYMLINK_NOFOLLOW), but not + * all systems do, so hide the ifdef-ed code in an inline function. + */ +static inline int +fs_lstatat(struct fs_fid *file, char *name, struct stat *st) +{ + + return (fstatat(dirfd(file->ff_dir), name, st, AT_SYMLINK_NOFOLLOW)); +} + +static int +fs_read(void *softc, struct l9p_request *req) +{ + struct l9p_stat l9stat; + struct fs_softc *sc; + struct fs_fid *file; + bool dotu = req->lr_conn->lc_version >= L9P_2000U; + ssize_t ret; + + sc = softc; + file = req->lr_fid->lo_aux; + assert(file != NULL); + + if (file->ff_dir != NULL) { + struct dirent *d; + struct stat st; + struct l9p_message msg; + long o; + + pthread_mutex_lock(&file->ff_mtx); + + /* + * Must use telldir before readdir since seekdir + * takes cookie values. Unfortunately this wastes + * a lot of time (and memory) building unneeded + * cookies that can only be flushed by closing + * the directory. + * + * NB: FreeBSD libc seekdir has SINGLEUSE defined, + * so in fact, we can discard the cookies by + * calling seekdir on them. This clears up wasted + * memory at the cost of even more wasted time... + * + * XXX: readdir/telldir/seekdir not thread safe + */ + l9p_init_msg(&msg, req, L9P_PACK); + for (;;) { + o = telldir(file->ff_dir); + d = readdir(file->ff_dir); + if (d == NULL) + break; + if (fs_lstatat(file, d->d_name, &st)) + continue; + dostat(sc, &l9stat, d->d_name, &st, dotu); + if (l9p_pack_stat(&msg, req, &l9stat) != 0) { + seekdir(file->ff_dir, o); + break; + } +#if defined(__FreeBSD__) + seekdir(file->ff_dir, o); + (void) readdir(file->ff_dir); +#endif + } + + pthread_mutex_unlock(&file->ff_mtx); + } else { + size_t niov = l9p_truncate_iov(req->lr_data_iov, + req->lr_data_niov, req->lr_req.io.count); + +#if defined(__FreeBSD__) + ret = preadv(file->ff_fd, req->lr_data_iov, niov, + req->lr_req.io.offset); +#else + /* XXX: not thread safe, should really use aio_listio. */ + if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0) + return (errno); + + ret = (uint32_t)readv(file->ff_fd, req->lr_data_iov, (int)niov); +#endif + + if (ret < 0) + return (errno); + + req->lr_resp.io.count = (uint32_t)ret; + } + + return (0); +} + +static int +fs_remove(void *softc, struct l9p_fid *fid) +{ + struct fs_softc *sc = softc; + struct l9p_acl *parent_acl; + struct fs_fid *file; + struct stat pst, cst; + char dirname[MAXPATHLEN]; + int error; + + if (sc->fs_readonly) + return (EROFS); + + error = fs_pdir(sc, fid, dirname, sizeof(dirname), &pst); + if (error) + return (error); + + file = fid->lo_aux; + if (fstatat(file->ff_dirfd, file->ff_name, &cst, AT_SYMLINK_NOFOLLOW) != 0) + return (error); + + parent_acl = getacl(file, -1, dirname); + fillacl(file); + + error = check_access(L9P_ACOP_UNLINK, + parent_acl, &pst, file->ff_acl, &cst, file->ff_ai, (gid_t)-1); + l9p_acl_free(parent_acl); + if (error) + return (error); + + if (unlinkat(file->ff_dirfd, file->ff_name, + S_ISDIR(cst.st_mode) ? AT_REMOVEDIR : 0) != 0) + error = errno; + + return (error); +} + +static int +fs_stat(void *softc, struct l9p_request *req) +{ + struct fs_softc *sc; + struct fs_fid *file; + struct stat st; + bool dotu = req->lr_conn->lc_version >= L9P_2000U; + + sc = softc; + file = req->lr_fid->lo_aux; + assert(file); + + if (fstatat(file->ff_dirfd, file->ff_name, &st, + AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + + dostat(sc, &req->lr_resp.rstat.stat, file->ff_name, &st, dotu); + return (0); +} + +static int +fs_walk(void *softc, struct l9p_request *req) +{ + struct l9p_acl *acl; + struct fs_authinfo *ai; + struct fs_fid *file = req->lr_fid->lo_aux; + struct fs_fid *newfile; + struct stat st; + size_t clen, namelen, need; + char *comp, *succ, *next, *swtmp; + bool atroot; + bool dotdot; + int i, nwname; + int error = 0; + char namebufs[2][MAXPATHLEN]; + + /* + * https://swtch.com/plan9port/man/man9/walk.html: + * + * It is legal for nwname to be zero, in which case newfid + * will represent the same file as fid and the walk will + * usually succeed; this is equivalent to walking to dot. + * [Aside: it's not clear if we should test S_ISDIR here.] + * ... + * The name ".." ... represents the parent directory. + * The name "." ... is not used in the protocol. + * ... A walk of the name ".." in the root directory + * of the server is equivalent to a walk with no name + * elements. + * + * Note that req.twalk.nwname never exceeds L9P_MAX_WELEM, + * so it is safe to convert to plain int. + * + * We are to return an error only if the first walk fails, + * else stop at the end of the names or on the first error. + * The final fid is based on the last name successfully + * walked. + * + * Note that we *do* get Twalk requests with nwname==0 on files. + * + * Set up "successful name" buffer pointer with base fid name, + * initially. We'll swap each new success into it as we go. + * + * Invariant: atroot and stat data correspond to current + * (succ) path. + */ + succ = namebufs[0]; + next = namebufs[1]; + namelen = strlcpy(succ, file->ff_name, MAXPATHLEN); + if (namelen >= MAXPATHLEN) + return (ENAMETOOLONG); + if (fstatat(file->ff_dirfd, succ, &st, AT_SYMLINK_NOFOLLOW) < 0) + return (errno); + ai = file->ff_ai; + atroot = strlen(succ) == 0; /* XXX? */ + fillacl(file); + acl = file->ff_acl; + + nwname = (int)req->lr_req.twalk.nwname; + + for (i = 0; i < nwname; i++) { + /* + * Must have execute permission to search a directory. + * Then, look up each component in its directory-so-far. + * Check for ".." along the way, handlng specially + * as needed. Forbid "/" in name components. + * + */ + if (!S_ISDIR(st.st_mode)) { + error = ENOTDIR; + goto out; + } + error = check_access(L9P_ACE_EXECUTE, + NULL, NULL, acl, &st, ai, (gid_t)-1); + if (error) { + L9P_LOG(L9P_DEBUG, + "Twalk: denying dir-walk on \"%s\" for uid %u", + succ, (unsigned)ai->ai_uid); + error = EPERM; + goto out; + } + comp = req->lr_req.twalk.wname[i]; + if (strchr(comp, '/') != NULL) { + error = EINVAL; + break; + } + + clen = strlen(comp); + dotdot = false; + + /* + * Build next pathname (into "next"). If "..", + * just strip one name component off the success + * name so far. Since we know this name fits, the + * stripped down version also fits. Otherwise, + * the name is the base name plus '/' plus the + * component name plus terminating '\0'; this may + * or may not fit. + */ + if (comp[0] == '.') { + if (clen == 1) { + error = EINVAL; + break; + } + if (comp[1] == '.' && clen == 2) + dotdot = true; + } + if (dotdot) { + /* + * It's not clear how ".." at root should + * be handled when i > 0. Obeying the man + * page exactly, we reset i to 0 and stop, + * declaring terminal success. + * + * Otherwise, we just climbed up one level + * so adjust "atroot". + */ + if (atroot) { + i = 0; + break; + } + (void) r_dirname(succ, next, MAXPATHLEN); + namelen = strlen(next); + atroot = strlen(next) == 0; /* XXX? */ + } else { + need = namelen + 1 + clen + 1; + if (need > MAXPATHLEN) { + error = ENAMETOOLONG; + break; + } + memcpy(next, succ, namelen); + next[namelen++] = '/'; + memcpy(&next[namelen], comp, clen + 1); + namelen += clen; + /* + * Since name is never ".", we are necessarily + * descending below the root now. + */ + atroot = false; + } + + if (fstatat(file->ff_dirfd, next, &st, AT_SYMLINK_NOFOLLOW) < 0) { + error = ENOENT; + break; + } + + /* + * Success: generate qid and swap this + * successful name into place. Update acl. + */ + generate_qid(&st, &req->lr_resp.rwalk.wqid[i]); + swtmp = succ; + succ = next; + next = swtmp; + if (acl != NULL && acl != file->ff_acl) + l9p_acl_free(acl); + acl = getacl(file, -1, next); + } + + /* + * Fail only if we failed on the first name. + * Otherwise we succeeded on something, and "succ" + * points to the last successful name in namebufs[]. + */ + if (error) { + if (i == 0) + goto out; + error = 0; + } + + newfile = open_fid(file->ff_dirfd, succ, ai, false); + if (newfile == NULL) { + error = ENOMEM; + goto out; + } + if (req->lr_newfid == req->lr_fid) { + /* + * Before overwriting fid->lo_aux, free the old value. + * Note that this doesn't free the l9p_fid data, + * just the fs_fid data. (But it does ditch ff_acl.) + */ + if (acl == file->ff_acl) + acl = NULL; + fs_freefid(softc, req->lr_fid); + file = NULL; + } + req->lr_newfid->lo_aux = newfile; + if (file != NULL && acl != file->ff_acl) { + newfile->ff_acl = acl; + acl = NULL; + } + req->lr_resp.rwalk.nwqid = (uint16_t)i; +out: + if (file != NULL && acl != file->ff_acl) + l9p_acl_free(acl); + return (error); +} + +static int +fs_write(void *softc, struct l9p_request *req) +{ + struct fs_softc *sc = softc; + struct fs_fid *file; + ssize_t ret; + + file = req->lr_fid->lo_aux; + assert(file != NULL); + + if (sc->fs_readonly) + return (EROFS); + + size_t niov = l9p_truncate_iov(req->lr_data_iov, + req->lr_data_niov, req->lr_req.io.count); + +#if defined(__FreeBSD__) + ret = pwritev(file->ff_fd, req->lr_data_iov, niov, + req->lr_req.io.offset); +#else + /* XXX: not thread safe, should really use aio_listio. */ + if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0) + return (errno); + + ret = writev(file->ff_fd, req->lr_data_iov, + (int)niov); +#endif + + if (ret < 0) + return (errno); + + req->lr_resp.io.count = (uint32_t)ret; + return (0); +} + +static int +fs_wstat(void *softc, struct l9p_request *req) +{ + struct fs_softc *sc = softc; + struct l9p_stat *l9stat = &req->lr_req.twstat.stat; + struct l9p_fid *fid; + struct fs_fid *file; + int error = 0; + + fid = req->lr_fid; + file = fid->lo_aux; + assert(file != NULL); + + /* + * XXX: + * + * stat(9P) sez: + * + * Either all the changes in wstat request happen, or none of them + * does: if the request succeeds, all changes were made; if it fails, + * none were. + * + * Atomicity is clearly missing in current implementation. + */ + + if (sc->fs_readonly) + return (EROFS); + + if (l9stat->atime != (uint32_t)~0) { + /* XXX: not implemented, ignore */ + } + + if (l9stat->mtime != (uint32_t)~0) { + /* XXX: not implemented, ignore */ + } + + if (l9stat->dev != (uint32_t)~0) { + error = EPERM; + goto out; + } + + if (l9stat->length != (uint64_t)~0) { + if (file->ff_dir != NULL) { + error = EINVAL; + goto out; + } + + if (truncate(file->ff_name, (off_t)l9stat->length) != 0) { + error = errno; + goto out; + } + } + + if (req->lr_conn->lc_version >= L9P_2000U) { + if (fchownat(file->ff_dirfd, file->ff_name, l9stat->n_uid, + l9stat->n_gid, AT_SYMLINK_NOFOLLOW) != 0) { + error = errno; + goto out; + } + } + + if (l9stat->mode != (uint32_t)~0) { + if (fchmodat(file->ff_dirfd, file->ff_name, + l9stat->mode & 0777, 0) != 0) { + error = errno; + goto out; + } + } + + if (strlen(l9stat->name) > 0) { + struct l9p_acl *parent_acl; + struct stat st; + char *tmp; + char newname[MAXPATHLEN]; + + /* + * Rename-within-directory: it's not deleting anything, + * but we need write permission on the directory. This + * should suffice. + */ + error = fs_pdir(softc, fid, newname, sizeof(newname), &st); + if (error) + goto out; + parent_acl = getacl(file, -1, newname); + error = check_access(L9P_ACE_ADD_FILE, + parent_acl, &st, NULL, NULL, file->ff_ai, (gid_t)-1); + l9p_acl_free(parent_acl); + if (error) + goto out; + error = fs_dpf(newname, l9stat->name, sizeof(newname)); + if (error) + goto out; + tmp = strdup(newname); + if (tmp == NULL) { + error = ENOMEM; + goto out; + } + if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd, + tmp) != 0) { + error = errno; + free(tmp); + goto out; + } + /* Successful rename, update file->ff_name. ACL can stay. */ + free(file->ff_name); + file->ff_name = tmp; + } +out: + return (error); +} + +static int +fs_statfs(void *softc __unused, struct l9p_request *req) +{ + struct fs_fid *file; + struct stat st; + struct statfs f; + long name_max; + int error; + int fd; + + file = req->lr_fid->lo_aux; + assert(file); + + if (fstatat(file->ff_dirfd, file->ff_name, &st, + AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + + /* + * Not entirely clear what access to require; we'll go + * for "read data". + */ + fillacl(file); + error = check_access(L9P_ACE_READ_DATA, NULL, NULL, + file->ff_acl, &st, file->ff_ai, (gid_t)-1); + if (error) + return (error); + + fd = openat(file->ff_dirfd, file->ff_name, 0); + if (fd < 0) + return (errno); + + if (fstatfs(fd, &f) != 0) + return (errno); + + name_max = fpathconf(fd, _PC_NAME_MAX); + error = errno; + close(fd); + + if (name_max == -1) + return (error); + + dostatfs(&req->lr_resp.rstatfs.statfs, &f, name_max); + + return (0); +} + +static int +fs_lopen(void *softc, struct l9p_request *req) +{ + struct l9p_fid *fid = req->lr_fid; + struct stat st; + enum l9p_omode p9; + gid_t gid; + int error, flags; + + error = fs_oflags_dotl(req->lr_req.tlopen.flags, &flags, &p9); + if (error) + return (error); + + gid = req->lr_req.tlopen.gid; + error = fs_iopen(softc, fid, flags, p9, gid, &st); + if (error) + return (error); + + generate_qid(&st, &req->lr_resp.rlopen.qid); + req->lr_resp.rlopen.iounit = req->lr_conn->lc_max_io_size; + return (0); +} + +static int +fs_lcreate(void *softc, struct l9p_request *req) +{ + struct l9p_fid *dir; + struct stat st; + enum l9p_omode p9; + char *name; + mode_t perm; + gid_t gid; + int error, flags; + + dir = req->lr_fid; + name = req->lr_req.tlcreate.name; + + error = fs_oflags_dotl(req->lr_req.tlcreate.flags, &flags, &p9); + if (error) + return (error); + + perm = (mode_t)req->lr_req.tlcreate.mode & 0777; /* ? set-id bits? */ + gid = req->lr_req.tlcreate.gid; + error = fs_icreate(softc, dir, name, flags, false, perm, gid, &st); + if (error == 0) + generate_qid(&st, &req->lr_resp.rlcreate.qid); + req->lr_resp.rlcreate.iounit = req->lr_conn->lc_max_io_size; + return (error); +} + +static int +fs_symlink(void *softc, struct l9p_request *req) +{ + struct l9p_fid *dir; + struct stat st; + gid_t gid; + char *name, *symtgt; + int error; + + dir = req->lr_fid; + name = req->lr_req.tsymlink.name; + symtgt = req->lr_req.tsymlink.symtgt; + gid = req->lr_req.tsymlink.gid; + error = fs_isymlink(softc, dir, name, symtgt, gid, &st); + if (error == 0) + generate_qid(&st, &req->lr_resp.rsymlink.qid); + return (error); +} + +static int +fs_mknod(void *softc, struct l9p_request *req) +{ + struct l9p_fid *dir; + struct stat st; + uint32_t mode, major, minor; + dev_t dev; + gid_t gid; + char *name; + int error; + + dir = req->lr_fid; + name = req->lr_req.tmknod.name; + mode = req->lr_req.tmknod.mode; + gid = req->lr_req.tmknod.gid; + + switch (mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + mode = (mode & S_IFMT) | (mode & 0777); /* ??? */ + major = req->lr_req.tmknod.major; + minor = req->lr_req.tmknod.major; + dev = makedev(major, minor); + error = fs_imknod(softc, dir, name, false, + (mode_t)mode, dev, gid, &st); + break; + + case S_IFIFO: + error = fs_imkfifo(softc, dir, name, false, + (mode_t)(mode & 0777), gid, &st); + break; + + case S_IFSOCK: + error = fs_imksocket(softc, dir, name, false, + (mode_t)(mode & 0777), gid, &st); + break; + + default: + error = EINVAL; + break; + } + if (error == 0) + generate_qid(&st, &req->lr_resp.rmknod.qid); + return (error); +} + +static int +fs_rename(void *softc, struct l9p_request *req) +{ + struct fs_softc *sc = softc; + struct fs_authinfo *ai; + struct l9p_acl *oparent_acl; + struct l9p_fid *fid, *f2; + struct fs_fid *file, *f2ff; + struct stat cst, opst, npst; + int32_t op; + bool reparenting; + char *tmp; + char olddir[MAXPATHLEN], newname[MAXPATHLEN]; + int error; + + if (sc->fs_readonly) + return (EROFS); + + /* + * Note: lr_fid represents the file that is to be renamed, + * so we must locate its parent directory and verify that + * both this parent directory and the new directory f2 are + * writable. But if the new parent directory is the same + * path as the old parent directory, our job is simpler. + */ + fid = req->lr_fid; + file = fid->lo_aux; + assert(file != NULL); + ai = file->ff_ai; + + error = fs_pdir(sc, fid, olddir, sizeof(olddir), &opst); + if (error) + return (error); + + f2 = req->lr_fid2; + f2ff = f2->lo_aux; + assert(f2ff != NULL); + + reparenting = strcmp(olddir, f2ff->ff_name) != 0; + + fillacl(file); + fillacl(f2ff); + + if (fstatat(file->ff_dirfd, file->ff_name, &cst, + AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + + /* + * Are we moving from olddir? If so, we're unlinking + * from it, in terms of ACL access. + */ + if (reparenting) { + oparent_acl = getacl(file, -1, olddir); + error = check_access(L9P_ACOP_UNLINK, + oparent_acl, &opst, file->ff_acl, &cst, ai, (gid_t)-1); + l9p_acl_free(oparent_acl); + if (error) + return (error); + } + + /* + * Now check that we're allowed to "create" a file or directory in + * f2. (Should we do this, too, only if reparenting? Maybe check + * for dir write permission if not reparenting -- but that's just + * add-file/add-subdir, which means doing this always.) + */ + if (fstatat(f2ff->ff_dirfd, f2ff->ff_name, &npst, + AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + + op = S_ISDIR(cst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE; + error = check_access(op, f2ff->ff_acl, &npst, NULL, NULL, + ai, (gid_t)-1); + if (error) + return (error); + + /* + * Directories OK, file systems not R/O, etc; build final name. + * f2ff->ff_name cannot exceed MAXPATHLEN, but out of general + * paranoia, let's double check anyway. + */ + if (strlcpy(newname, f2ff->ff_name, sizeof(newname)) >= sizeof(newname)) + return (ENAMETOOLONG); + error = fs_dpf(newname, req->lr_req.trename.name, sizeof(newname)); + if (error) + return (error); + tmp = strdup(newname); + if (tmp == NULL) + return (ENOMEM); + + if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd, tmp) != 0) { + error = errno; + free(tmp); + return (error); + } + + /* file has been renamed but old fid is not clunked */ + free(file->ff_name); + file->ff_name = tmp; + + dropacl(file); + return (0); +} + +static int +fs_readlink(void *softc __unused, struct l9p_request *req) +{ + struct fs_fid *file; + ssize_t linklen; + char buf[MAXPATHLEN]; + int error = 0; + + file = req->lr_fid->lo_aux; + assert(file); + + linklen = readlinkat(file->ff_dirfd, file->ff_name, buf, sizeof(buf)); + if (linklen < 0) + error = errno; + else if ((size_t)linklen >= sizeof(buf)) + error = ENOMEM; /* todo: allocate dynamically */ + else if ((req->lr_resp.rreadlink.target = strndup(buf, + (size_t)linklen)) == NULL) + error = ENOMEM; + return (error); +} + +static int +fs_getattr(void *softc __unused, struct l9p_request *req) +{ + uint64_t mask, valid; + struct fs_fid *file; + struct stat st; + int error = 0; + + file = req->lr_fid->lo_aux; + assert(file); + + valid = 0; + if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) { + error = errno; + goto out; + } + /* ?? Can we provide items not-requested? If so, can skip tests. */ + mask = req->lr_req.tgetattr.request_mask; + if (mask & L9PL_GETATTR_MODE) { + /* It is not clear if we need any translations. */ + req->lr_resp.rgetattr.mode = st.st_mode; + valid |= L9PL_GETATTR_MODE; + } + if (mask & L9PL_GETATTR_NLINK) { + req->lr_resp.rgetattr.nlink = st.st_nlink; + valid |= L9PL_GETATTR_NLINK; + } + if (mask & L9PL_GETATTR_UID) { + /* provide st_uid, or file->ff_uid? */ + req->lr_resp.rgetattr.uid = st.st_uid; + valid |= L9PL_GETATTR_UID; + } + if (mask & L9PL_GETATTR_GID) { + /* provide st_gid, or file->ff_gid? */ + req->lr_resp.rgetattr.gid = st.st_gid; + valid |= L9PL_GETATTR_GID; + } + if (mask & L9PL_GETATTR_RDEV) { + /* It is not clear if we need any translations. */ + req->lr_resp.rgetattr.rdev = (uint64_t)st.st_rdev; + valid |= L9PL_GETATTR_RDEV; + } + if (mask & L9PL_GETATTR_ATIME) { + req->lr_resp.rgetattr.atime_sec = + (uint64_t)st.st_atimespec.tv_sec; + req->lr_resp.rgetattr.atime_nsec = + (uint64_t)st.st_atimespec.tv_nsec; + valid |= L9PL_GETATTR_ATIME; + } + if (mask & L9PL_GETATTR_MTIME) { + req->lr_resp.rgetattr.mtime_sec = + (uint64_t)st.st_mtimespec.tv_sec; + req->lr_resp.rgetattr.mtime_nsec = + (uint64_t)st.st_mtimespec.tv_nsec; + valid |= L9PL_GETATTR_MTIME; + } + if (mask & L9PL_GETATTR_CTIME) { + req->lr_resp.rgetattr.ctime_sec = + (uint64_t)st.st_ctimespec.tv_sec; + req->lr_resp.rgetattr.ctime_nsec = + (uint64_t)st.st_ctimespec.tv_nsec; + valid |= L9PL_GETATTR_CTIME; + } + if (mask & L9PL_GETATTR_BTIME) { +#if defined(HAVE_BIRTHTIME) + req->lr_resp.rgetattr.btime_sec = + (uint64_t)st.st_birthtim.tv_sec; + req->lr_resp.rgetattr.btime_nsec = + (uint64_t)st.st_birthtim.tv_nsec; +#else + req->lr_resp.rgetattr.btime_sec = 0; + req->lr_resp.rgetattr.btime_nsec = 0; +#endif + valid |= L9PL_GETATTR_BTIME; + } + if (mask & L9PL_GETATTR_INO) + valid |= L9PL_GETATTR_INO; + if (mask & L9PL_GETATTR_SIZE) { + req->lr_resp.rgetattr.size = (uint64_t)st.st_size; + valid |= L9PL_GETATTR_SIZE; + } + if (mask & L9PL_GETATTR_BLOCKS) { + req->lr_resp.rgetattr.blksize = (uint64_t)st.st_blksize; + req->lr_resp.rgetattr.blocks = (uint64_t)st.st_blocks; + valid |= L9PL_GETATTR_BLOCKS; + } + if (mask & L9PL_GETATTR_GEN) { + req->lr_resp.rgetattr.gen = st.st_gen; + valid |= L9PL_GETATTR_GEN; + } + /* don't know what to do with data version yet */ + + generate_qid(&st, &req->lr_resp.rgetattr.qid); +out: + req->lr_resp.rgetattr.valid = valid; + return (error); +} + +/* + * Should combine some of this with wstat code. + */ +static int +fs_setattr(void *softc, struct l9p_request *req) +{ + uint64_t mask; + struct fs_softc *sc = softc; + struct timespec ts[2]; + struct fs_fid *file; + struct stat st; + int error = 0; + uid_t uid, gid; + + file = req->lr_fid->lo_aux; + assert(file); + + if (sc->fs_readonly) + return (EROFS); + + /* + * As with WSTAT we have atomicity issues. + */ + mask = req->lr_req.tsetattr.valid; + + if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) { + error = errno; + goto out; + } + + if ((mask & L9PL_SETATTR_SIZE) && S_ISDIR(st.st_mode)) { + error = EISDIR; + goto out; + } + + if (mask & L9PL_SETATTR_MODE) { + if (fchmodat(file->ff_dirfd, file->ff_name, + req->lr_req.tsetattr.mode & 0777, + AT_SYMLINK_NOFOLLOW)) { + error = errno; + goto out; + } + } + + if (mask & (L9PL_SETATTR_UID | L9PL_SETATTR_GID)) { + uid = mask & L9PL_SETATTR_UID + ? req->lr_req.tsetattr.uid + : (uid_t)-1; + + gid = mask & L9PL_SETATTR_GID + ? req->lr_req.tsetattr.gid + : (gid_t)-1; + + if (fchownat(file->ff_dirfd, file->ff_name, uid, gid, + AT_SYMLINK_NOFOLLOW)) { + error = errno; + goto out; + } + } + + if (mask & L9PL_SETATTR_SIZE) { + /* Truncate follows symlinks, is this OK? */ + int fd = openat(file->ff_dirfd, file->ff_name, O_RDWR); + if (ftruncate(fd, (off_t)req->lr_req.tsetattr.size)) { + error = errno; + (void) close(fd); + goto out; + } + (void) close(fd); + } + + if (mask & (L9PL_SETATTR_ATIME | L9PL_SETATTR_MTIME)) { + ts[0].tv_sec = st.st_atimespec.tv_sec; + ts[0].tv_nsec = st.st_atimespec.tv_nsec; + ts[1].tv_sec = st.st_mtimespec.tv_sec; + ts[1].tv_nsec = st.st_mtimespec.tv_nsec; + + if (mask & L9PL_SETATTR_ATIME) { + if (mask & L9PL_SETATTR_ATIME_SET) { + ts[0].tv_sec = req->lr_req.tsetattr.atime_sec; + ts[0].tv_nsec = req->lr_req.tsetattr.atime_nsec; + } else { + if (clock_gettime(CLOCK_REALTIME, &ts[0]) != 0) { + error = errno; + goto out; + } + } + } + + if (mask & L9PL_SETATTR_MTIME) { + if (mask & L9PL_SETATTR_MTIME_SET) { + ts[1].tv_sec = req->lr_req.tsetattr.mtime_sec; + ts[1].tv_nsec = req->lr_req.tsetattr.mtime_nsec; + } else { + if (clock_gettime(CLOCK_REALTIME, &ts[1]) != 0) { + error = errno; + goto out; + } + } + } + + if (utimensat(file->ff_dirfd, file->ff_name, ts, + AT_SYMLINK_NOFOLLOW)) { + error = errno; + goto out; + } + } +out: + return (error); +} + +static int +fs_xattrwalk(void *softc __unused, struct l9p_request *req __unused) +{ + return (EOPNOTSUPP); +} + +static int +fs_xattrcreate(void *softc __unused, struct l9p_request *req __unused) +{ + return (EOPNOTSUPP); +} + +static int +fs_readdir(void *softc __unused, struct l9p_request *req) +{ + struct l9p_message msg; + struct l9p_dirent de; + struct fs_fid *file; + struct dirent *dp; + struct stat st; + uint32_t count; + int error = 0; + + file = req->lr_fid->lo_aux; + assert(file); + + if (file->ff_dir == NULL) + return (ENOTDIR); + + pthread_mutex_lock(&file->ff_mtx); + + /* + * It's not clear whether we can use the same trick for + * discarding offsets here as we do in fs_read. It + * probably should work, we'll have to see if some + * client(s) use the zero-offset thing to rescan without + * clunking the directory first. + * + * Probably the thing to do is switch to calling + * getdirentries() / getdents() directly, instead of + * going through libc. + */ + if (req->lr_req.io.offset == 0) + rewinddir(file->ff_dir); + else + seekdir(file->ff_dir, (long)req->lr_req.io.offset); + + l9p_init_msg(&msg, req, L9P_PACK); + count = (uint32_t)msg.lm_size; /* in case we get no entries */ + while ((dp = readdir(file->ff_dir)) != NULL) { + /* + * Although "." is forbidden in naming and ".." is + * special cased, testing shows that we must transmit + * them through readdir. (For ".." at root, we + * should perhaps alter the inode number, but not + * yet.) + */ + + /* + * TODO: we do a full lstat here; could use dp->d_* + * to construct the qid more efficiently, as long + * as dp->d_type != DT_UNKNOWN. + */ + if (fs_lstatat(file, dp->d_name, &st)) + continue; + + de.qid.type = 0; + generate_qid(&st, &de.qid); + de.offset = (uint64_t)telldir(file->ff_dir); + de.type = dp->d_type; + de.name = dp->d_name; + + /* Update count only if we completely pack the dirent. */ + if (l9p_pudirent(&msg, &de) < 0) + break; + count = (uint32_t)msg.lm_size; + } + + pthread_mutex_unlock(&file->ff_mtx); + req->lr_resp.io.count = count; + return (error); +} + +static int +fs_fsync(void *softc __unused, struct l9p_request *req) +{ + struct fs_fid *file; + int error = 0; + + file = req->lr_fid->lo_aux; + assert(file); + if (fsync(file->ff_dir != NULL ? dirfd(file->ff_dir) : file->ff_fd)) + error = errno; + return (error); +} + +static int +fs_lock(void *softc __unused, struct l9p_request *req) +{ + + switch (req->lr_req.tlock.type) { + case L9PL_LOCK_TYPE_RDLOCK: + case L9PL_LOCK_TYPE_WRLOCK: + case L9PL_LOCK_TYPE_UNLOCK: + break; + default: + return (EINVAL); + } + + req->lr_resp.rlock.status = L9PL_LOCK_SUCCESS; + return (0); +} + +static int +fs_getlock(void *softc __unused, struct l9p_request *req) +{ + + /* + * Client wants to see if a request to lock a region would + * block. This is, of course, not atomic anyway, so the + * op is useless. QEMU simply says "unlocked!", so we do + * too. + */ + switch (req->lr_req.getlock.type) { + case L9PL_LOCK_TYPE_RDLOCK: + case L9PL_LOCK_TYPE_WRLOCK: + case L9PL_LOCK_TYPE_UNLOCK: + break; + default: + return (EINVAL); + } + + req->lr_resp.getlock = req->lr_req.getlock; + req->lr_resp.getlock.type = L9PL_LOCK_TYPE_UNLOCK; + req->lr_resp.getlock.client_id = strdup(""); /* XXX what should go here? */ + return (0); +} + +static int +fs_link(void *softc __unused, struct l9p_request *req) +{ + struct l9p_fid *dir; + struct fs_fid *file; + struct fs_fid *dirf; + struct stat fst, tdst; + int32_t op; + char *name; + char newname[MAXPATHLEN]; + int error; + + /* N.B.: lr_fid is the file to link, lr_fid2 is the target dir */ + dir = req->lr_fid2; + dirf = dir->lo_aux; + assert(dirf != NULL); + + name = req->lr_req.tlink.name; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + file = req->lr_fid->lo_aux; + assert(file != NULL); + + if (fstatat(dirf->ff_dirfd, dirf->ff_name, &tdst, AT_SYMLINK_NOFOLLOW) != 0 || + fstatat(file->ff_dirfd, file->ff_name, &fst, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + if (S_ISDIR(fst.st_mode)) + return (EISDIR); + fillacl(dirf); + op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE; + error = check_access(op, + dirf->ff_acl, &tdst, NULL, NULL, file->ff_ai, (gid_t)-1); + if (error) + return (error); + + if (linkat(file->ff_dirfd, file->ff_name, file->ff_dirfd, + newname, 0) != 0) + error = errno; + else + dropacl(file); + + return (error); +} + +static int +fs_mkdir(void *softc, struct l9p_request *req) +{ + struct l9p_fid *dir; + struct stat st; + mode_t perm; + gid_t gid; + char *name; + int error; + + dir = req->lr_fid; + name = req->lr_req.tmkdir.name; + perm = (mode_t)req->lr_req.tmkdir.mode; + gid = req->lr_req.tmkdir.gid; + + error = fs_imkdir(softc, dir, name, false, perm, gid, &st); + if (error == 0) + generate_qid(&st, &req->lr_resp.rmkdir.qid); + return (error); +} + +static int +fs_renameat(void *softc, struct l9p_request *req) +{ + struct fs_softc *sc = softc; + struct l9p_fid *olddir, *newdir; + struct l9p_acl *facl; + struct fs_fid *off, *nff; + struct stat odst, ndst, fst; + int32_t op; + bool reparenting; + char *onp, *nnp; + char onb[MAXPATHLEN], nnb[MAXPATHLEN]; + int error; + + if (sc->fs_readonly) + return (EROFS); + + olddir = req->lr_fid; + newdir = req->lr_fid2; + assert(olddir != NULL && newdir != NULL); + off = olddir->lo_aux; + nff = newdir->lo_aux; + assert(off != NULL && nff != NULL); + + onp = req->lr_req.trenameat.oldname; + nnp = req->lr_req.trenameat.newname; + error = fs_buildname(olddir, onp, onb, sizeof(onb)); + if (error) + return (error); + error = fs_buildname(newdir, nnp, nnb, sizeof(nnb)); + if (error) + return (error); + if (fstatat(off->ff_dirfd, onb, &fst, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + + reparenting = olddir != newdir && + strcmp(off->ff_name, nff->ff_name) != 0; + + if (fstatat(off->ff_dirfd, off->ff_name, &odst, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + if (!S_ISDIR(odst.st_mode)) + return (ENOTDIR); + fillacl(off); + + if (reparenting) { + if (fstatat(nff->ff_dirfd, nff->ff_name, &ndst, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + if (!S_ISDIR(ndst.st_mode)) + return (ENOTDIR); + facl = getacl(off, -1, onb); + fillacl(nff); + + error = check_access(L9P_ACOP_UNLINK, + off->ff_acl, &odst, facl, &fst, off->ff_ai, (gid_t)-1); + l9p_acl_free(facl); + if (error) + return (error); + op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : + L9P_ACE_ADD_FILE; + error = check_access(op, + nff->ff_acl, &ndst, NULL, NULL, nff->ff_ai, (gid_t)-1); + if (error) + return (error); + } + + if (renameat(off->ff_dirfd, onb, nff->ff_dirfd, nnb)) + error = errno; + + return (error); +} + +/* + * Unlink file in given directory, or remove directory in given + * directory, based on flags. + */ +static int +fs_unlinkat(void *softc, struct l9p_request *req) +{ + struct fs_softc *sc = softc; + struct l9p_acl *facl; + struct l9p_fid *dir; + struct fs_fid *dirff; + struct stat dirst, fst; + char *name; + char newname[MAXPATHLEN]; + int error; + + if (sc->fs_readonly) + return (EROFS); + + dir = req->lr_fid; + dirff = dir->lo_aux; + assert(dirff != NULL); + name = req->lr_req.tunlinkat.name; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + if (fstatat(dirff->ff_dirfd, newname, &fst, AT_SYMLINK_NOFOLLOW) != 0 || + fstatat(dirff->ff_dirfd, dirff->ff_name, &dirst, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + fillacl(dirff); + facl = getacl(dirff, -1, newname); + error = check_access(L9P_ACOP_UNLINK, + dirff->ff_acl, &dirst, facl, &fst, dirff->ff_ai, (gid_t)-1); + l9p_acl_free(facl); + if (error) + return (error); + + if (req->lr_req.tunlinkat.flags & L9PL_AT_REMOVEDIR) { + if (unlinkat(dirff->ff_dirfd, newname, AT_REMOVEDIR) != 0) + error = errno; + } else { + if (unlinkat(dirff->ff_dirfd, newname, 0) != 0) + error = errno; + } + return (error); +} + +static void +fs_freefid(void *softc __unused, struct l9p_fid *fid) +{ + struct fs_fid *f = fid->lo_aux; + struct fs_authinfo *ai; + uint32_t newcount; + + if (f == NULL) { + /* Nothing to do here */ + return; + } + + if (f->ff_fd != -1) + close(f->ff_fd); + + if (f->ff_dir) + closedir(f->ff_dir); + + pthread_mutex_destroy(&f->ff_mtx); + free(f->ff_name); + ai = f->ff_ai; + l9p_acl_free(f->ff_acl); + free(f); + pthread_mutex_lock(&ai->ai_mtx); + newcount = --ai->ai_refcnt; + pthread_mutex_unlock(&ai->ai_mtx); + if (newcount == 0) { + /* + * We *were* the last ref, no one can have gained a ref. + */ + L9P_LOG(L9P_DEBUG, "dropped last ref to authinfo %p", + (void *)ai); + pthread_mutex_destroy(&ai->ai_mtx); + free(ai); + } else { + L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu", + (void *)ai, (u_long)newcount); + } +} + +int +l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro) +{ + struct l9p_backend *backend; + struct fs_softc *sc; + int error; +#if defined(WITH_CASPER) + cap_channel_t *capcas; +#endif + + if (!fs_attach_mutex_inited) { + error = pthread_mutex_init(&fs_attach_mutex, NULL); + if (error) { + errno = error; + return (-1); + } + fs_attach_mutex_inited = true; + } + + backend = l9p_malloc(sizeof(*backend)); + backend->attach = fs_attach; + backend->clunk = fs_clunk; + backend->create = fs_create; + backend->open = fs_open; + backend->read = fs_read; + backend->remove = fs_remove; + backend->stat = fs_stat; + backend->walk = fs_walk; + backend->write = fs_write; + backend->wstat = fs_wstat; + backend->statfs = fs_statfs; + backend->lopen = fs_lopen; + backend->lcreate = fs_lcreate; + backend->symlink = fs_symlink; + backend->mknod = fs_mknod; + backend->rename = fs_rename; + backend->readlink = fs_readlink; + backend->getattr = fs_getattr; + backend->setattr = fs_setattr; + backend->xattrwalk = fs_xattrwalk; + backend->xattrcreate = fs_xattrcreate; + backend->readdir = fs_readdir; + backend->fsync = fs_fsync; + backend->lock = fs_lock; + backend->getlock = fs_getlock; + backend->link = fs_link; + backend->mkdir = fs_mkdir; + backend->renameat = fs_renameat; + backend->unlinkat = fs_unlinkat; + backend->freefid = fs_freefid; + + sc = l9p_malloc(sizeof(*sc)); + sc->fs_rootfd = rootfd; + sc->fs_readonly = ro; + backend->softc = sc; + +#if defined(WITH_CASPER) + capcas = cap_init(); + if (capcas == NULL) + return (-1); + + sc->fs_cappwd = cap_service_open(capcas, "system.pwd"); + if (sc->fs_cappwd == NULL) + return (-1); + + sc->fs_capgrp = cap_service_open(capcas, "system.grp"); + if (sc->fs_capgrp == NULL) + return (-1); + + cap_setpassent(sc->fs_cappwd, 1); + cap_setgroupent(sc->fs_capgrp, 1); + cap_close(capcas); +#else + setpassent(1); +#endif + + *backendp = backend; + return (0); +} Index: lib/lib9p/connection.c =================================================================== --- /dev/null +++ lib/lib9p/connection.c @@ -0,0 +1,215 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include "lib9p.h" +#include "lib9p_impl.h" +#include "fid.h" +#include "hashtable.h" +#include "log.h" +#include "threadpool.h" +#include "backend/backend.h" + +int +l9p_server_init(struct l9p_server **serverp, struct l9p_backend *backend) +{ + struct l9p_server *server; + + server = l9p_calloc(1, sizeof (*server)); + server->ls_max_version = L9P_2000L; + server->ls_backend = backend; + LIST_INIT(&server->ls_conns); + + *serverp = server; + return (0); +} + +int +l9p_connection_init(struct l9p_server *server, struct l9p_connection **conn) +{ + struct l9p_connection *newconn; + + assert(server != NULL); + assert(conn != NULL); + + newconn = calloc(1, sizeof (*newconn)); + if (newconn == NULL) + return (-1); + newconn->lc_server = server; + newconn->lc_msize = L9P_DEFAULT_MSIZE; + if (l9p_threadpool_init(&newconn->lc_tp, L9P_NUMTHREADS)) { + free(newconn); + return (-1); + } + ht_init(&newconn->lc_files, 100); + ht_init(&newconn->lc_requests, 100); + LIST_INSERT_HEAD(&server->ls_conns, newconn, lc_link); + *conn = newconn; + + return (0); +} + +void +l9p_connection_free(struct l9p_connection *conn) +{ + + LIST_REMOVE(conn, lc_link); + free(conn); +} + +void +l9p_connection_recv(struct l9p_connection *conn, const struct iovec *iov, + const size_t niov, void *aux) +{ + struct l9p_request *req; + int error; + + req = l9p_calloc(1, sizeof (struct l9p_request)); + req->lr_aux = aux; + req->lr_conn = conn; + + req->lr_req_msg.lm_mode = L9P_UNPACK; + req->lr_req_msg.lm_niov = niov; + memcpy(req->lr_req_msg.lm_iov, iov, sizeof (struct iovec) * niov); + + req->lr_resp_msg.lm_mode = L9P_PACK; + + if (l9p_pufcall(&req->lr_req_msg, &req->lr_req, conn->lc_version) != 0) { + L9P_LOG(L9P_WARNING, "cannot unpack received message"); + l9p_freefcall(&req->lr_req); + free(req); + return; + } + + if (ht_add(&conn->lc_requests, req->lr_req.hdr.tag, req)) { + L9P_LOG(L9P_WARNING, "client reusing outstanding tag %d", + req->lr_req.hdr.tag); + l9p_freefcall(&req->lr_req); + free(req); + return; + } + + error = conn->lc_lt.lt_get_response_buffer(req, + req->lr_resp_msg.lm_iov, + &req->lr_resp_msg.lm_niov, + conn->lc_lt.lt_aux); + if (error) { + L9P_LOG(L9P_WARNING, "cannot obtain buffers for response"); + ht_remove(&conn->lc_requests, req->lr_req.hdr.tag); + l9p_freefcall(&req->lr_req); + free(req); + return; + } + + /* + * NB: it's up to l9p_threadpool_run to decide whether + * to queue the work or to run it immediately and wait + * (it must do the latter for Tflush requests). + */ + l9p_threadpool_run(&conn->lc_tp, req); +} + +void +l9p_connection_close(struct l9p_connection *conn) +{ + struct ht_iter iter; + struct l9p_fid *fid; + struct l9p_request *req; + + L9P_LOG(L9P_DEBUG, "waiting for thread pool to shut down"); + l9p_threadpool_shutdown(&conn->lc_tp); + + /* Drain pending requests (if any) */ + L9P_LOG(L9P_DEBUG, "draining pending requests"); + ht_iter(&conn->lc_requests, &iter); + while ((req = ht_next(&iter)) != NULL) { +#ifdef notyet + /* XXX would be good to know if there is anyone listening */ + if (anyone listening) { + /* XXX crude - ops like Tclunk should succeed */ + req->lr_error = EINTR; + l9p_respond(req, false, false); + } else +#endif + l9p_respond(req, true, false); /* use no-answer path */ + ht_remove_at_iter(&iter); + } + + /* Close opened files (if any) */ + L9P_LOG(L9P_DEBUG, "closing opened files"); + ht_iter(&conn->lc_files, &iter); + while ((fid = ht_next(&iter)) != NULL) { + conn->lc_server->ls_backend->freefid( + conn->lc_server->ls_backend->softc, fid); + free(fid); + ht_remove_at_iter(&iter); + } + + ht_destroy(&conn->lc_requests); + ht_destroy(&conn->lc_files); +} + +struct l9p_fid * +l9p_connection_alloc_fid(struct l9p_connection *conn, uint32_t fid) +{ + struct l9p_fid *file; + + file = l9p_calloc(1, sizeof (struct l9p_fid)); + file->lo_fid = fid; + /* + * Note that the new fid is not marked valid yet. + * The insert here will fail if the fid number is + * in use, otherwise we have an invalid fid in the + * table (as desired). + */ + + if (ht_add(&conn->lc_files, fid, file) != 0) { + free(file); + return (NULL); + } + + return (file); +} + +void +l9p_connection_remove_fid(struct l9p_connection *conn, struct l9p_fid *fid) +{ + struct l9p_backend *be; + + /* fid should be marked invalid by this point */ + assert(!l9p_fid_isvalid(fid)); + + be = conn->lc_server->ls_backend; + be->freefid(be->softc, fid); + + ht_remove(&conn->lc_files, fid->lo_fid); + free(fid); +} Index: lib/lib9p/example/Makefile =================================================================== --- /dev/null +++ lib/lib9p/example/Makefile @@ -0,0 +1,10 @@ +PROG= server +SRCS= server.c +MAN= + +CFLAGS= -pthread -g -O0 + +LDFLAGS=-L.. +LDADD= -lsbuf -l9p -lcasper -lcap_pwd -lcap_grp + +.include Index: lib/lib9p/example/server.c =================================================================== --- /dev/null +++ lib/lib9p/example/server.c @@ -0,0 +1,83 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include "../lib9p.h" +#include "../backend/fs.h" +#include "../transport/socket.h" + +int +main(int argc, char **argv) +{ + struct l9p_backend *fs_backend; + struct l9p_server *server; + char *host = "0.0.0.0"; + char *port = "564"; + char *path; + int rootfd; + int opt; + + while ((opt = getopt(argc, argv, "h:p:")) != -1) { + switch (opt) { + case 'h': + host = optarg; + break; + case 'p': + port = optarg; + break; + case '?': + default: + goto usage; + } + } + if (optind >= argc) { +usage: + errx(1, "Usage: server "); + } + path = argv[optind]; + rootfd = open(path, O_DIRECTORY); + + if (rootfd < 0) + err(1, "cannot open root directory"); + + if (l9p_backend_fs_init(&fs_backend, rootfd) != 0) + err(1, "cannot init backend"); + + if (l9p_server_init(&server, fs_backend) != 0) + err(1, "cannot create server"); + + server->ls_max_version = L9P_2000L; + if (l9p_start_server(server, host, port)) + err(1, "l9p_start_server() failed"); + + /* XXX - we never get here, l9p_start_server does not return */ + exit(0); +} Index: lib/lib9p/fcall.h =================================================================== --- /dev/null +++ lib/lib9p/fcall.h @@ -0,0 +1,624 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Based on libixp code: ©2007-2010 Kris Maglione + */ + +#ifndef LIB9P_FCALL_H +#define LIB9P_FCALL_H + +#include + +#define L9P_MAX_WELEM 256 + +/* + * Function call/reply (Tfoo/Rfoo) numbers. + * + * These are protocol code numbers, so the exact values + * matter. However, __FIRST and __LAST_PLUS_ONE are for + * debug code, and just need to encompass the entire range. + * + * Note that we rely (in the debug code) on Rfoo == Tfoo+1. + */ +enum l9p_ftype { + L9P__FIRST = 6, /* NB: must be <= all legal values */ + L9P_TLERROR = 6, /* illegal; exists for parity with Rlerror */ + L9P_RLERROR, + L9P_TSTATFS = 8, + L9P_RSTATFS, + L9P_TLOPEN = 12, + L9P_RLOPEN, + L9P_TLCREATE = 14, + L9P_RLCREATE, + L9P_TSYMLINK = 16, + L9P_RSYMLINK, + L9P_TMKNOD = 18, + L9P_RMKNOD, + L9P_TRENAME = 20, + L9P_RRENAME, + L9P_TREADLINK = 22, + L9P_RREADLINK, + L9P_TGETATTR = 24, + L9P_RGETATTR, + L9P_TSETATTR = 26, + L9P_RSETATTR, + L9P_TXATTRWALK = 30, + L9P_RXATTRWALK, + L9P_TXATTRCREATE = 32, + L9P_RXATTRCREATE, + L9P_TREADDIR = 40, + L9P_RREADDIR, + L9P_TFSYNC = 50, + L9P_RFSYNC, + L9P_TLOCK = 52, + L9P_RLOCK, + L9P_TGETLOCK = 54, + L9P_RGETLOCK, + L9P_TLINK = 70, + L9P_RLINK, + L9P_TMKDIR = 72, + L9P_RMKDIR, + L9P_TRENAMEAT = 74, + L9P_RRENAMEAT, + L9P_TUNLINKAT = 76, + L9P_RUNLINKAT, + L9P_TVERSION = 100, + L9P_RVERSION, + L9P_TAUTH = 102, + L9P_RAUTH, + L9P_TATTACH = 104, + L9P_RATTACH, + L9P_TERROR = 106, /* illegal */ + L9P_RERROR, + L9P_TFLUSH = 108, + L9P_RFLUSH, + L9P_TWALK = 110, + L9P_RWALK, + L9P_TOPEN = 112, + L9P_ROPEN, + L9P_TCREATE = 114, + L9P_RCREATE, + L9P_TREAD = 116, + L9P_RREAD, + L9P_TWRITE = 118, + L9P_RWRITE, + L9P_TCLUNK = 120, + L9P_RCLUNK, + L9P_TREMOVE = 122, + L9P_RREMOVE, + L9P_TSTAT = 124, + L9P_RSTAT, + L9P_TWSTAT = 126, + L9P_RWSTAT, + L9P__LAST_PLUS_1, /* NB: must be last */ +}; + +/* + * When a Tfoo request comes over the wire, we decode it + * (pack.c) from wire format into a request laid out in + * a "union l9p_fcall" object. This object is not in wire + * format, but rather in something more convenient for us + * to operate on. + * + * We then dispatch the request (request.c, backend/fs.c) and + * use another "union l9p_fcall" object to build a reply. + * The reply is converted to wire format on the way back out + * (pack.c again). + * + * All sub-objects start with a header containing the request + * or reply type code and two-byte tag, and whether or not it + * is needed, a four-byte fid. + * + * What this means here is that the data structures within + * the union can be shared across various requests and replies. + * For instance, replies to OPEN, CREATE, LCREATE, LOPEN, MKDIR, and + * SYMLINK are all fairly similar (providing a qid and sometimes + * an iounit) and hence can all use the l9p_f_ropen structure. + * Which structures are used for which operations is somewhat + * arbitrary; for programming ease, if an operation shares a + * data structure, it still has its own name: there are union + * members named ropen, rcreate, rlcreate, rlopen, rmkdir, and + * rsymlink, even though all use struct l9p_f_ropen. + * + * The big exception to the above rule is struct l9p_f_io, which + * is used as both request and reply for all of READ, WRITE, and + * READDIR. Moreover, the READDIR reply must be pre-packed into + * wire format (it is handled like raw data a la READ). + * + * Some request messages (e.g., TREADLINK) fit in a header, having + * just type code, tag, and fid. These have no separate data + * structure, nor union member name. Similarly, some reply + * messages (e.g., RCLUNK, RREMOVE, RRENAME) have just the type + * code and tag. + */ + +/* + * Type code bits in (the first byte of) a qid. + */ +enum l9p_qid_type { + L9P_QTDIR = 0x80, /* type bit for directories */ + L9P_QTAPPEND = 0x40, /* type bit for append only files */ + L9P_QTEXCL = 0x20, /* type bit for exclusive use files */ + L9P_QTMOUNT = 0x10, /* type bit for mounted channel */ + L9P_QTAUTH = 0x08, /* type bit for authentication file */ + L9P_QTTMP = 0x04, /* type bit for non-backed-up file */ + L9P_QTSYMLINK = 0x02, /* type bit for symbolic link */ + L9P_QTFILE = 0x00 /* type bits for plain file */ +}; + +/* + * Extra permission bits in create and file modes (stat). + */ +#define L9P_DMDIR 0x80000000 +enum { + L9P_DMAPPEND = 0x40000000, + L9P_DMEXCL = 0x20000000, + L9P_DMMOUNT = 0x10000000, + L9P_DMAUTH = 0x08000000, + L9P_DMTMP = 0x04000000, + L9P_DMSYMLINK = 0x02000000, + /* 9P2000.u extensions */ + L9P_DMDEVICE = 0x00800000, + L9P_DMNAMEDPIPE = 0x00200000, + L9P_DMSOCKET = 0x00100000, + L9P_DMSETUID = 0x00080000, + L9P_DMSETGID = 0x00040000, +}; + +/* + * Open/create mode bits in 9P2000 and 9P2000.u operations + * (not Linux lopen and lcreate flags, which are different). + * Note that the mode field is only one byte wide. + */ +enum l9p_omode { + L9P_OREAD = 0, /* open for read */ + L9P_OWRITE = 1, /* write */ + L9P_ORDWR = 2, /* read and write */ + L9P_OEXEC = 3, /* execute, == read but check execute permission */ + L9P_OACCMODE = 3, /* mask for the above access-mode bits */ + L9P_OTRUNC = 16, /* or'ed in (except for exec), truncate file first */ + L9P_OCEXEC = 32, /* or'ed in, close on exec */ + L9P_ORCLOSE = 64, /* or'ed in, remove on close */ + L9P_ODIRECT = 128, /* or'ed in, direct access */ +}; + +/* + * Flag bits in 9P2000.L operations (Tlopen, Tlcreate). These are + * basically just the Linux L_* flags. The bottom 3 bits are the + * same as for l9p_omode, although open-for-exec is not used: + * instead, the client does a Tgetattr and checks the mode for + * execute bits, then just opens for reading. + * + * Each L_O_xxx is just value O_xxx has on Linux in ; + * not all are necessarily used. From observation, we do get + * L_O_CREAT and L_O_EXCL when creating with exclusive, and always + * get L_O_LARGEFILE. We do get L_O_APPEND when opening for + * append. We also get both L_O_DIRECT and L_O_DIRECTORY set + * when opening directories. + * + * We probably never get L_O_NOCTTY which makes no sense, and + * some of the other options may need to be handled on the client. + */ +enum l9p_l_o_flags { + L9P_L_O_CREAT = 000000100U, + L9P_L_O_EXCL = 000000200U, + L9P_L_O_NOCTTY = 000000400U, + L9P_L_O_TRUNC = 000001000U, + L9P_L_O_APPEND = 000002000U, + L9P_L_O_NONBLOCK = 000004000U, + L9P_L_O_DSYNC = 000010000U, + L9P_L_O_FASYNC = 000020000U, + L9P_L_O_DIRECT = 000040000U, + L9P_L_O_LARGEFILE = 000100000U, + L9P_L_O_DIRECTORY = 000200000U, + L9P_L_O_NOFOLLOW = 000400000U, + L9P_L_O_NOATIME = 001000000U, + L9P_L_O_CLOEXEC = 002000000U, + L9P_L_O_SYNC = 004000000U, + L9P_L_O_PATH = 010000000U, + L9P_L_O_TMPFILE = 020000000U, +}; + +struct l9p_hdr { + uint8_t type; + uint16_t tag; + uint32_t fid; +}; + +struct l9p_qid { + enum l9p_qid_type type; + uint32_t version; + uint64_t path; +}; + +struct l9p_stat { + uint16_t type; + uint32_t dev; + struct l9p_qid qid; + uint32_t mode; + uint32_t atime; + uint32_t mtime; + uint64_t length; + char *name; + char *uid; + char *gid; + char *muid; + char *extension; + uint32_t n_uid; + uint32_t n_gid; + uint32_t n_muid; +}; + +#define L9P_FSTYPE 0x01021997 + +struct l9p_statfs { + uint32_t type; /* file system type */ + uint32_t bsize; /* block size for I/O */ + uint64_t blocks; /* file system size (bsize-byte blocks) */ + uint64_t bfree; /* free blocks in fs */ + uint64_t bavail; /* free blocks avail to non-superuser*/ + uint64_t files; /* file nodes in file system (# inodes) */ + uint64_t ffree; /* free file nodes in fs */ + uint64_t fsid; /* file system identifier */ + uint32_t namelen; /* maximum length of filenames */ +}; + +struct l9p_f_version { + struct l9p_hdr hdr; + uint32_t msize; + char *version; +}; + +struct l9p_f_tflush { + struct l9p_hdr hdr; + uint16_t oldtag; +}; + +struct l9p_f_error { + struct l9p_hdr hdr; + char *ename; + uint32_t errnum; +}; + +struct l9p_f_ropen { + struct l9p_hdr hdr; + struct l9p_qid qid; + uint32_t iounit; +}; + +struct l9p_f_rauth { + struct l9p_hdr hdr; + struct l9p_qid aqid; +}; + +struct l9p_f_attach { + struct l9p_hdr hdr; + uint32_t afid; + char *uname; + char *aname; + uint32_t n_uname; +}; +#define L9P_NOFID ((uint32_t)-1) /* in Tattach, no auth fid */ +#define L9P_NONUNAME ((uint32_t)-1) /* in Tattach, no n_uname */ + +struct l9p_f_tcreate { + struct l9p_hdr hdr; + uint32_t perm; + char *name; + uint8_t mode; /* +Topen */ + char *extension; +}; + +struct l9p_f_twalk { + struct l9p_hdr hdr; + uint32_t newfid; + uint16_t nwname; + char *wname[L9P_MAX_WELEM]; +}; + +struct l9p_f_rwalk { + struct l9p_hdr hdr; + uint16_t nwqid; + struct l9p_qid wqid[L9P_MAX_WELEM]; +}; + +struct l9p_f_io { + struct l9p_hdr hdr; + uint64_t offset; /* Tread, Twrite, Treaddir */ + uint32_t count; /* Tread, Twrite, Rread, Treaddir, Rreaddir */ +}; + +struct l9p_f_rstat { + struct l9p_hdr hdr; + struct l9p_stat stat; +}; + +struct l9p_f_twstat { + struct l9p_hdr hdr; + struct l9p_stat stat; +}; + +struct l9p_f_rstatfs { + struct l9p_hdr hdr; + struct l9p_statfs statfs; +}; + +/* Used for Tlcreate, Tlopen, Tmkdir, Tunlinkat. */ +struct l9p_f_tlcreate { + struct l9p_hdr hdr; + char *name; /* Tlcreate, Tmkdir, Tunlinkat */ + uint32_t flags; /* Tlcreate, Tlopen, Tmkdir, Tunlinkat */ + uint32_t mode; /* Tlcreate, Tmkdir */ + uint32_t gid; /* Tlcreate, Tmkdir */ +}; + +struct l9p_f_tsymlink { + struct l9p_hdr hdr; + char *name; + char *symtgt; + uint32_t gid; +}; + +struct l9p_f_tmknod { + struct l9p_hdr hdr; + char *name; + uint32_t mode; + uint32_t major; + uint32_t minor; + uint32_t gid; +}; + +struct l9p_f_trename { + struct l9p_hdr hdr; + uint32_t dfid; + char *name; +}; + +struct l9p_f_rreadlink { + struct l9p_hdr hdr; + char *target; +}; + +struct l9p_f_tgetattr { + struct l9p_hdr hdr; + uint64_t request_mask; +}; + +struct l9p_f_rgetattr { + struct l9p_hdr hdr; + uint64_t valid; + struct l9p_qid qid; + uint32_t mode; + uint32_t uid; + uint32_t gid; + uint64_t nlink; + uint64_t rdev; + uint64_t size; + uint64_t blksize; + uint64_t blocks; + uint64_t atime_sec; + uint64_t atime_nsec; + uint64_t mtime_sec; + uint64_t mtime_nsec; + uint64_t ctime_sec; + uint64_t ctime_nsec; + uint64_t btime_sec; + uint64_t btime_nsec; + uint64_t gen; + uint64_t data_version; +}; + +/* Fields in req->request_mask and reply->valid for Tgetattr, Rgetattr. */ +enum l9pl_getattr_flags { + L9PL_GETATTR_MODE = 0x00000001, + L9PL_GETATTR_NLINK = 0x00000002, + L9PL_GETATTR_UID = 0x00000004, + L9PL_GETATTR_GID = 0x00000008, + L9PL_GETATTR_RDEV = 0x00000010, + L9PL_GETATTR_ATIME = 0x00000020, + L9PL_GETATTR_MTIME = 0x00000040, + L9PL_GETATTR_CTIME = 0x00000080, + L9PL_GETATTR_INO = 0x00000100, + L9PL_GETATTR_SIZE = 0x00000200, + L9PL_GETATTR_BLOCKS = 0x00000400, + /* everything up to and including BLOCKS is BASIC */ + L9PL_GETATTR_BASIC = L9PL_GETATTR_MODE | + L9PL_GETATTR_NLINK | + L9PL_GETATTR_UID | + L9PL_GETATTR_GID | + L9PL_GETATTR_RDEV | + L9PL_GETATTR_ATIME | + L9PL_GETATTR_MTIME | + L9PL_GETATTR_CTIME | + L9PL_GETATTR_INO | + L9PL_GETATTR_SIZE | + L9PL_GETATTR_BLOCKS, + L9PL_GETATTR_BTIME = 0x00000800, + L9PL_GETATTR_GEN = 0x00001000, + L9PL_GETATTR_DATA_VERSION = 0x00002000, + /* BASIC + birthtime + gen + data-version = ALL */ + L9PL_GETATTR_ALL = L9PL_GETATTR_BASIC | + L9PL_GETATTR_BTIME | + L9PL_GETATTR_GEN | + L9PL_GETATTR_DATA_VERSION, +}; + +struct l9p_f_tsetattr { + struct l9p_hdr hdr; + uint32_t valid; + uint32_t mode; + uint32_t uid; + uint32_t gid; + uint64_t size; + uint64_t atime_sec; /* if valid & L9PL_SETATTR_ATIME_SET */ + uint64_t atime_nsec; /* (else use on-server time) */ + uint64_t mtime_sec; /* if valid & L9PL_SETATTR_MTIME_SET */ + uint64_t mtime_nsec; /* (else use on-server time) */ +}; + +/* Fields in req->valid for Tsetattr. */ +enum l9pl_setattr_flags { + L9PL_SETATTR_MODE = 0x00000001, + L9PL_SETATTR_UID = 0x00000002, + L9PL_SETATTR_GID = 0x00000004, + L9PL_SETATTR_SIZE = 0x00000008, + L9PL_SETATTR_ATIME = 0x00000010, + L9PL_SETATTR_MTIME = 0x00000020, + L9PL_SETATTR_CTIME = 0x00000040, + L9PL_SETATTR_ATIME_SET = 0x00000080, + L9PL_SETATTR_MTIME_SET = 0x00000100, +}; + +struct l9p_f_txattrwalk { + struct l9p_hdr hdr; + uint32_t newfid; + char *name; +}; + +struct l9p_f_rxattrwalk { + struct l9p_hdr hdr; + uint64_t size; +}; + +struct l9p_f_txattrcreate { + struct l9p_hdr hdr; + char *name; + uint64_t attr_size; + uint32_t flags; +}; + +struct l9p_f_tlock { + struct l9p_hdr hdr; + uint8_t type; /* from l9pl_lock_type */ + uint32_t flags; /* from l9pl_lock_flags */ + uint64_t start; + uint64_t length; + uint32_t proc_id; + char *client_id; +}; + +enum l9pl_lock_type { + L9PL_LOCK_TYPE_RDLOCK = 0, + L9PL_LOCK_TYPE_WRLOCK = 1, + L9PL_LOCK_TYPE_UNLOCK = 2, +}; + +enum l9pl_lock_flags { + L9PL_LOCK_TYPE_BLOCK = 1, + L9PL_LOCK_TYPE_RECLAIM = 2, +}; + +struct l9p_f_rlock { + struct l9p_hdr hdr; + uint8_t status; /* from l9pl_lock_status */ +}; + +enum l9pl_lock_status { + L9PL_LOCK_SUCCESS = 0, + L9PL_LOCK_BLOCKED = 1, + L9PL_LOCK_ERROR = 2, + L9PL_LOCK_GRACE = 3, +}; + +struct l9p_f_getlock { + struct l9p_hdr hdr; + uint8_t type; /* from l9pl_lock_type */ + uint64_t start; + uint64_t length; + uint32_t proc_id; + char *client_id; +}; + +struct l9p_f_tlink { + struct l9p_hdr hdr; + uint32_t dfid; + char *name; +}; + +struct l9p_f_trenameat { + struct l9p_hdr hdr; + char *oldname; + uint32_t newdirfid; + char *newname; +}; + +/* + * Flags in Tunlinkat (which re-uses f_tlcreate data structure but + * with different meaning). + */ +enum l9p_l_unlinkat_flags { + /* not sure if any other AT_* flags are passed through */ + L9PL_AT_REMOVEDIR = 0x0200, +}; + +union l9p_fcall { + struct l9p_hdr hdr; + struct l9p_f_version version; + struct l9p_f_tflush tflush; + struct l9p_f_ropen ropen; + struct l9p_f_ropen rcreate; + struct l9p_f_ropen rattach; + struct l9p_f_error error; + struct l9p_f_rauth rauth; + struct l9p_f_attach tattach; + struct l9p_f_attach tauth; + struct l9p_f_tcreate tcreate; + struct l9p_f_tcreate topen; + struct l9p_f_twalk twalk; + struct l9p_f_rwalk rwalk; + struct l9p_f_twstat twstat; + struct l9p_f_rstat rstat; + struct l9p_f_rstatfs rstatfs; + struct l9p_f_tlcreate tlopen; + struct l9p_f_ropen rlopen; + struct l9p_f_tlcreate tlcreate; + struct l9p_f_ropen rlcreate; + struct l9p_f_tsymlink tsymlink; + struct l9p_f_ropen rsymlink; + struct l9p_f_tmknod tmknod; + struct l9p_f_ropen rmknod; + struct l9p_f_trename trename; + struct l9p_f_rreadlink rreadlink; + struct l9p_f_tgetattr tgetattr; + struct l9p_f_rgetattr rgetattr; + struct l9p_f_tsetattr tsetattr; + struct l9p_f_txattrwalk txattrwalk; + struct l9p_f_rxattrwalk rxattrwalk; + struct l9p_f_txattrcreate txattrcreate; + struct l9p_f_tlock tlock; + struct l9p_f_rlock rlock; + struct l9p_f_getlock getlock; + struct l9p_f_tlink tlink; + struct l9p_f_tlcreate tmkdir; + struct l9p_f_ropen rmkdir; + struct l9p_f_trenameat trenameat; + struct l9p_f_tlcreate tunlinkat; + struct l9p_f_io io; +}; + +#endif /* LIB9P_FCALL_H */ Index: lib/lib9p/fid.h =================================================================== --- /dev/null +++ lib/lib9p/fid.h @@ -0,0 +1,160 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_FID_H +#define LIB9P_FID_H + +#include + +/* + * Data structure for a fid. All active fids in one session + * are stored in a hash table; the hash table provides the + * iterator to process them. (See also l9p_connection in lib9p.h.) + * + * The back-end code has additional data per fid, found via + * lo_aux. Currently this is allocated with a separate calloc(). + * + * Most fids represent a file or directory, but a few are special + * purpose, including the auth fid from Tauth+Tattach, and the + * fids used for extended attributes. We have our own set of + * flags here in lo_flags. + * + * Note that all new fids start as potentially-valid (reserving + * their 32-bit fid value), but not actually-valid. If another + * (threaded) op is invoked on a not-yet-valid fid, the fid cannot + * be used. A fid can also be locked against other threads, in + * which case they must wait for it: this happens during create + * and open, which on success result in the fid changing from a + * directory to a file. (At least, all this applies in principle + * -- we're currently single-threaded per connection so the locks + * are nop-ed out and the valid bit is mainly just for debug.) + * + * Fids that are "open" (the underlying file or directory is open) + * are marked as well. + * + * Locking is managed by the front end (request.c); validation + * and type-marking can be done by either side as needed. + * + * Fid types and validity are manipulated by set* and unset* + * functions, and tested by is* ops. Note that we only + * distinguish between "directory" and "not directory" at this + * level, i.e., symlinks and devices are just "not a directory + * fid". Also, fids cannot be unset as auth or xattr fids, + * nor can an open fid become closed, except by being clunked. + * While files should not normally become directories, it IS normal + * for directory fids to become file fids due to Twalk operations. + * + * (These accessor functions are just to leave wiggle room for + * different future implementations.) + */ +struct l9p_fid { + void *lo_aux; + uint32_t lo_fid; + uint32_t lo_flags; /* volatile atomic_t when threaded? */ +}; + +enum l9p_lo_flags { + L9P_LO_ISAUTH = 0x01, + L9P_LO_ISDIR = 0x02, + L9P_LO_ISOPEN = 0x04, + L9P_LO_ISVALID = 0x08, + L9P_LO_ISXATTR = 0x10, +}; + +static inline bool +l9p_fid_isauth(struct l9p_fid *fid) +{ + return ((fid->lo_flags & L9P_LO_ISAUTH) != 0); +} + +static inline void +l9p_fid_setauth(struct l9p_fid *fid) +{ + fid->lo_flags |= L9P_LO_ISAUTH; +} + +static inline bool +l9p_fid_isdir(struct l9p_fid *fid) +{ + return ((fid->lo_flags & L9P_LO_ISDIR) != 0); +} + +static inline void +l9p_fid_setdir(struct l9p_fid *fid) +{ + fid->lo_flags |= L9P_LO_ISDIR; +} + +static inline void +l9p_fid_unsetdir(struct l9p_fid *fid) +{ + fid->lo_flags &= ~(uint32_t)L9P_LO_ISDIR; +} + +static inline bool +l9p_fid_isopen(struct l9p_fid *fid) +{ + return ((fid->lo_flags & L9P_LO_ISOPEN) != 0); +} + +static inline void +l9p_fid_setopen(struct l9p_fid *fid) +{ + fid->lo_flags |= L9P_LO_ISOPEN; +} + +static inline bool +l9p_fid_isvalid(struct l9p_fid *fid) +{ + return ((fid->lo_flags & L9P_LO_ISVALID) != 0); +} + +static inline void +l9p_fid_setvalid(struct l9p_fid *fid) +{ + fid->lo_flags |= L9P_LO_ISVALID; +} + +static inline void +l9p_fid_unsetvalid(struct l9p_fid *fid) +{ + fid->lo_flags &= ~(uint32_t)L9P_LO_ISVALID; +} + +static inline bool +l9p_fid_isxattr(struct l9p_fid *fid) +{ + return ((fid->lo_flags & L9P_LO_ISXATTR) != 0); +} + +static inline void +l9p_fid_setxattr(struct l9p_fid *fid) +{ + fid->lo_flags |= L9P_LO_ISXATTR; +} + +#endif /* LIB9P_FID_H */ Index: lib/lib9p/genacl.h =================================================================== --- /dev/null +++ lib/lib9p/genacl.h @@ -0,0 +1,307 @@ +/* + * Copyright 2016 Chris Torek + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * General ACL support for 9P2000.L. + * + * We mostly use Linux's xattr name space and nfs4 ACL bits, as + * these are the most general forms available. + * + * Linux requests attributes named + * + * "system.posix_acl_default" + * "system.posix_acl_access" + * + * to get POSIX style ACLs, and: + * + * "system.nfs4_acl" + * + * to get NFSv4 style ACLs. The v9fs client does not explicitly + * ask for the latter, but if you use the Ubuntu nfs4-acl-tools + * package, it should be able to read and write these. + * + * For the record, the Linux kernel source code also shows: + * + * - Lustre uses "trusted.*", with "*" matching "lov", "lma", + * "lmv", "dmv", "link", "fid", "version", "som", "hsm", and + * "lfsck_namespace". + * + * - ceph has a name tree of the form "ceph.." with + * pairs like <"dir","entries">, <"dir","files>, + * <"file","layout">, and so on. + * + * - ext4 uses the POSIX names, plus some special ext4-specific + * goop that might not get externalized. + * + * - NFS uses both the POSIX names and the NFSv4 ACLs. However, + * what it mainly does is have nfsd generate fake NFSv4 ACLs + * from POSIX ACLs. If you run an NFS client, the client + * relies on the server actually implementing the ACLs, and + * lets nfs4-acl-tools read and write the system.nfs4_acl xattr + * data. If you run an NFS server off, e.g., an ext4 file system, + * the server looks for the system.nfs4_acl xattr, serves that + * out if found, and otherwise just generates the fakes. + * + * - "security.*" and "selinux.*" are reserved. + * + * - "security.capability" is the name for capabilities. + * + * - sockets use "system.sockprotoname". + */ + +#if defined(__APPLE__) + #define HAVE_POSIX_ACLS + #define HAVE_DARWIN_ACLS +#endif + +#if defined(__FreeBSD__) + #define HAVE_POSIX_ACLS + #define HAVE_FREEBSD_ACLS +#endif + +#include +#include /* XXX assumes existence of sys/acl.h */ + +/* + * An ACL consists of a number of ACEs that grant some kind of + * "allow" or "deny" to some specific entity. + * + * The number of ACEs is potentially unlimited, although in practice + * they tend not to be that long. + * + * It's the responsibility of the back-end to supply the ACL + * for each test. However, the ACL may be in some sort of + * system-specific form. It's the responsibility of some + * (system-specific) code to translate it to *this* form, after + * which the backend may use l9p_acl_check_access() to get + * access granted or denied (and, eventually, audits and alarms + * recorded and raises, although that's yet to be designed). + * + * The reason for all this faffing-about with formats is so that + * we can *report* the ACLs using Linux 9p style xattrs. + */ + +struct l9p_acl; +struct l9p_fid; + +void l9p_acl_free(struct l9p_acl *); + +/* + * An ACL is made up of ACEs. + * + * Each ACE has: + * + * - a type: allow, deny, audit, alarm + * - a set of flags + * - permissions bits: a "mask" + * - an optional, nominally-variable-length identity + * + * The last part is especially tricky and currently has limited + * support here: it's always a 16 byte field on Darwin, and just + * a uint32_t on BSD (should be larger, really). Linux supports + * very large, actually-variable-size values; we'll deal with + * this later, maybe. + * + * We will define the mask first, below, since these are also the bits + * passed in for the accmask argument to l9p_acl_check_access(). + */ + +/* + * ACL entry mask, and accmask argument flags. + * + * NB: not every bit is implemented, but they are all here because + * they are all defined as part of an NFSv4 ACL entry, which is + * more or less a superset of a POSIX ACL entry. This means you + * can put a complete NFSv4 ACL in and we can reproduce it. + * + * Note that the LIST_DIRECTORY, ADD_FILE, and ADD_SUBDIRECTORY bits + * apply only to a directory, while the READ_DATA, WRITE_DATA, and + * APPEND_DATA bits apply only to a file. See aca_parent/aca_child + * below. + */ +#define L9P_ACE_READ_DATA 0x00001 +#define L9P_ACE_LIST_DIRECTORY 0x00001 /* same as READ_DATA */ +#define L9P_ACE_WRITE_DATA 0x00002 +#define L9P_ACE_ADD_FILE 0x00002 /* same as WRITE_DATA */ +#define L9P_ACE_APPEND_DATA 0x00004 +#define L9P_ACE_ADD_SUBDIRECTORY 0x00004 /* same as APPEND_DATA */ +#define L9P_ACE_READ_NAMED_ATTRS 0x00008 +#define L9P_ACE_WRITE_NAMED_ATTRS 0x00010 +#define L9P_ACE_EXECUTE 0x00020 +#define L9P_ACE_DELETE_CHILD 0x00040 +#define L9P_ACE_READ_ATTRIBUTES 0x00080 +#define L9P_ACE_WRITE_ATTRIBUTES 0x00100 +#define L9P_ACE_WRITE_RETENTION 0x00200 /* not used here */ +#define L9P_ACE_WRITE_RETENTION_HOLD 0x00400 /* not used here */ +/* 0x00800 unused? */ +#define L9P_ACE_DELETE 0x01000 +#define L9P_ACE_READ_ACL 0x02000 +#define L9P_ACE_WRITE_ACL 0x04000 +#define L9P_ACE_WRITE_OWNER 0x08000 +#define L9P_ACE_SYNCHRONIZE 0x10000 /* not used here */ + +/* + * This is not an ACE bit, but is used with the access checking + * below. It represents a request to unlink (delete child / + * delete) an entity, and is equivalent to asking for *either* + * (not both) permission. + */ +#define L9P_ACOP_UNLINK (L9P_ACE_DELETE_CHILD | L9P_ACE_DELETE) + +/* + * Access checking takes a lot of arguments, so they are + * collected into a "struct" here. + * + * The aca_parent and aca_pstat fields may/must be NULL if the + * operation itself does not involve "directory" permissions. + * The aca_child and aca_cstat fields may/must be NULL if the + * operation does not involve anything *but* a directory. This + * is how we decide whether you're interested in L9P_ACE_READ_DATA + * vs L9P_ACE_LIST_DIRECTORY, for instance. + * + * Note that it's OK for both parent and child to be directories + * (as is the case when we're adding or deleting a subdirectory). + */ +struct l9p_acl_check_args { + uid_t aca_uid; /* the uid that is requesting access */ + gid_t aca_gid; /* the gid that is requesting access */ + gid_t *aca_groups; /* the additional group-set, if any */ + size_t aca_ngroups; /* number of groups in group-set */ + struct l9p_acl *aca_parent; /* ACLs associated with parent/dir */ + struct stat *aca_pstat; /* stat data for parent/dir */ + struct l9p_acl *aca_child; /* ACLs associated with file */ + struct stat *aca_cstat; /* stat data for file */ + int aca_aclmode; /* mode checking bits, see below */ + bool aca_superuser; /* alway allow uid==0 in STAT_MODE */ +}; + +/* + * Access checking mode bits in aca_checkmode. If you enable + * ACLs, they are used first, optionally with ZFS style ACLs. + * This means that even if aca_superuser is set, if an ACL denies + * permission to uid 0, permission is really denied. + * + * NFS style ACLs run before POSIX style ACLs (though POSIX + * ACLs aren't done yet anyway). + * + * N.B.: you probably want L9P_ACL_ZFS, especially when operating + * with a ZFS file system on FreeBSD. + */ +#define L9P_ACM_NFS_ACL 0x0001 /* enable NFS ACL checking */ +#define L9P_ACM_ZFS_ACL 0x0002 /* use ZFS ACL unlink semantics */ +#define L9P_ACM_POSIX_ACL 0x0004 /* enable POSIX ACL checking (notyet) */ +#define L9P_ACM_STAT_MODE 0x0008 /* enable st_mode bits */ + +/* + * Requests to access some file or directory must provide: + * + * - An operation. This should usually be just one bit from the + * L9P_ACE_* bit-sets above, or our special L9P_ACOP_UNLINK. + * For a few file-open operations it may be multiple bits, + * e.g., both read and write data. + * - The identity of the accessor: uid + gid + gid-set. + * - The type of access desired: this may be multiple bits. + * - The parent directory, if applicable. + * - The child file/dir being accessed, if applicable. + * - stat data for parent and/or child, if applicable. + * + * The ACLs and/or stat data of the parent and/or child get used + * here, so the caller must provide them. We should have a way to + * cache these on fids, but not yet. The parent and child + * arguments are a bit tricky; see the code in genacl.c. + */ +int l9p_acl_check_access(int32_t op, struct l9p_acl_check_args *args); + +/* + * When falling back to POSIX ACL or Unix-style permissions + * testing, it's nice to collapse the above detailed permissions + * into simple read/write/execute bits (value 0..7). We provide + * a small utility function that does this. + */ +int l9p_ace_mask_to_rwx(int32_t); + +/* + * The rest of the data in an ACE. + */ + +/* type in ace_type */ +#define L9P_ACET_ACCESS_ALLOWED 0 +#define L9P_ACET_ACCESS_DENIED 1 +#define L9P_ACET_SYSTEM_AUDIT 2 +#define L9P_ACET_SYSTEM_ALARM 3 + +/* flags in ace_flags */ +#define L9P_ACEF_FILE_INHERIT_ACE 0x001 +#define L9P_ACEF_DIRECTORY_INHERIT_ACE 0x002 +#define L9P_ACEF_NO_PROPAGATE_INHERIT_ACE 0x004 +#define L9P_ACEF_INHERIT_ONLY_ACE 0x008 +#define L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG 0x010 +#define L9P_ACEF_FAILED_ACCESS_ACE_FLAG 0x020 +#define L9P_ACEF_IDENTIFIER_GROUP 0x040 +#define L9P_ACEF_OWNER 0x080 +#define L9P_ACEF_GROUP 0x100 +#define L9P_ACEF_EVERYONE 0x200 + +#if defined(__APPLE__) +# define L9P_ACE_IDSIZE 16 /* but, how do we map Darwin uuid? */ +#else +# define L9P_ACE_IDSIZE 4 +#endif + +struct l9p_ace { + uint16_t ace_type; /* ACL entry type */ + uint16_t ace_flags; /* ACL entry flags */ + uint32_t ace_mask; /* ACL entry mask */ + uint32_t ace_idsize; /* length of ace_idbytes */ + unsigned char ace_idbytes[L9P_ACE_IDSIZE]; +}; + +#define L9P_ACLTYPE_NFSv4 1 /* currently the only valid type */ +struct l9p_acl { + uint32_t acl_acetype; /* reserved for future expansion */ + uint32_t acl_nace; /* number of occupied ACEs */ + uint32_t acl_aceasize; /* actual size of ACE array */ + struct l9p_ace acl_aces[]; /* variable length ACE array */ +}; + +/* + * These are the system-specific converters. + * + * Right now the backend needs to just find BSD NFSv4 ACLs + * and convert them before each operation that needs to be + * tested. + */ +#if defined(HAVE_DARWIN_ACLS) +struct l9p_acl *l9p_darwin_nfsv4acl_to_acl(acl_t acl); +#endif + +#if defined(HAVE_FREEBSD_ACLS) +struct l9p_acl *l9p_freebsd_nfsv4acl_to_acl(acl_t acl); +#endif + +#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */ +struct l9p_acl *l9p_posix_acl_to_acl(acl_t acl); +#endif Index: lib/lib9p/genacl.c =================================================================== --- /dev/null +++ lib/lib9p/genacl.c @@ -0,0 +1,720 @@ +/* + * Copyright 2016 Chris Torek + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "lib9p.h" +#include "lib9p_impl.h" +#include "genacl.h" +#include "fid.h" +#include "log.h" + +typedef int econvertfn(acl_entry_t, struct l9p_ace *); + +#ifndef __APPLE__ +static struct l9p_acl *l9p_new_acl(uint32_t acetype, uint32_t aceasize); +static struct l9p_acl *l9p_growacl(struct l9p_acl *acl, uint32_t aceasize); +static int l9p_count_aces(acl_t sysacl); +static struct l9p_acl *l9p_sysacl_to_acl(int, acl_t, econvertfn *); +#endif +static bool l9p_ingroup(gid_t tid, gid_t gid, gid_t *gids, size_t ngids); +static int l9p_check_aces(int32_t mask, struct l9p_acl *acl, struct stat *st, + uid_t uid, gid_t gid, gid_t *gids, size_t ngids); + +void +l9p_acl_free(struct l9p_acl *acl) +{ + + free(acl); +} + +/* + * Is the given group ID tid (test-id) any of the gid's in agids? + */ +static bool +l9p_ingroup(gid_t tid, gid_t gid, gid_t *gids, size_t ngids) +{ + size_t i; + + if (tid == gid) + return (true); + for (i = 0; i < ngids; i++) + if (tid == gids[i]) + return (true); + return (false); +} + +/* #define ACE_DEBUG */ + +/* + * Note that NFSv4 tests are done on a "first match" basis. + * That is, we check each ACE sequentially until we run out + * of ACEs, or find something explicitly denied (DENIED!), + * or have cleared out all our attempt-something bits. Once + * we come across an ALLOW entry for the bits we're trying, + * we clear those from the bits we're still looking for, in + * the order they appear. + * + * The result is either "definitely allowed" (we cleared + * all the bits), "definitely denied" (we hit a deny with + * some or all of the bits), or "unspecified". We + * represent these three states as +1 (positive = yes = allow), + * -1 (negative = no = denied), or 0 (no strong answer). + * + * For our caller's convenience, if we are called with a + * mask of 0, we return 0 (no answer). + */ +static int +l9p_check_aces(int32_t mask, struct l9p_acl *acl, struct stat *st, + uid_t uid, gid_t gid, gid_t *gids, size_t ngids) +{ + uint32_t i; + struct l9p_ace *ace; +#ifdef ACE_DEBUG + const char *acetype, *allowdeny; + bool show_tid; +#endif + bool match; + uid_t tid; + + if (mask == 0) + return (0); + + for (i = 0; mask != 0 && i < acl->acl_nace; i++) { + ace = &acl->acl_aces[i]; + switch (ace->ace_type) { + case L9P_ACET_ACCESS_ALLOWED: + case L9P_ACET_ACCESS_DENIED: + break; + default: + /* audit, alarm - ignore */ + continue; + } +#ifdef ACE_DEBUG + show_tid = false; +#endif + if (ace->ace_flags & L9P_ACEF_OWNER) { +#ifdef ACE_DEBUG + acetype = "OWNER@"; +#endif + match = st->st_uid == uid; + } else if (ace->ace_flags & L9P_ACEF_GROUP) { +#ifdef ACE_DEBUG + acetype = "GROUP@"; +#endif + match = l9p_ingroup(st->st_gid, gid, gids, ngids); + } else if (ace->ace_flags & L9P_ACEF_EVERYONE) { +#ifdef ACE_DEBUG + acetype = "EVERYONE@"; +#endif + match = true; + } else { + if (ace->ace_idsize != sizeof(tid)) + continue; +#ifdef ACE_DEBUG + show_tid = true; +#endif + memcpy(&tid, &ace->ace_idbytes, sizeof(tid)); + if (ace->ace_flags & L9P_ACEF_IDENTIFIER_GROUP) { +#ifdef ACE_DEBUG + acetype = "group"; +#endif + match = l9p_ingroup(tid, gid, gids, ngids); + } else { +#ifdef ACE_DEBUG + acetype = "user"; +#endif + match = tid == uid; + } + } + /* + * If this ACE applies to us, check remaining bits. + * If any of those bits also apply, check the type: + * DENY means "stop now", ALLOW means allow these bits + * and keep checking. + */ +#ifdef ACE_DEBUG + allowdeny = ace->ace_type == L9P_ACET_ACCESS_DENIED ? + "deny" : "allow"; +#endif + if (match && (ace->ace_mask & (uint32_t)mask) != 0) { +#ifdef ACE_DEBUG + if (show_tid) + L9P_LOG(L9P_DEBUG, + "ACE: %s %s %d: mask 0x%x ace_mask 0x%x", + allowdeny, acetype, (int)tid, + (u_int)mask, (u_int)ace->ace_mask); + else + L9P_LOG(L9P_DEBUG, + "ACE: %s %s: mask 0x%x ace_mask 0x%x", + allowdeny, acetype, + (u_int)mask, (u_int)ace->ace_mask); +#endif + if (ace->ace_type == L9P_ACET_ACCESS_DENIED) + return (-1); + mask &= ~ace->ace_mask; +#ifdef ACE_DEBUG + L9P_LOG(L9P_DEBUG, "clear 0x%x: now mask=0x%x", + (u_int)ace->ace_mask, (u_int)mask); +#endif + } else { +#ifdef ACE_DEBUG + if (show_tid) + L9P_LOG(L9P_DEBUG, + "ACE: SKIP %s %s %d: " + "match %d mask 0x%x ace_mask 0x%x", + allowdeny, acetype, (int)tid, + (int)match, (u_int)mask, + (u_int)ace->ace_mask); + else + L9P_LOG(L9P_DEBUG, + "ACE: SKIP %s %s: " + "match %d mask 0x%x ace_mask 0x%x", + allowdeny, acetype, + (int)match, (u_int)mask, + (u_int)ace->ace_mask); +#endif + } + } + + /* Return 1 if access definitely granted. */ +#ifdef ACE_DEBUG + L9P_LOG(L9P_DEBUG, "ACE: end of ACEs, mask now 0x%x: %s", + mask, mask ? "no-definitive-answer" : "ALLOW"); +#endif + return (mask == 0 ? 1 : 0); +} + +/* + * Test against ACLs. + * + * The return value is normally 0 (access allowed) or EPERM + * (access denied), so it could just be a boolean.... + * + * For "make new dir in dir" and "remove dir in dir", you must + * set the mask to test the directory permissions (not ADD_FILE but + * ADD_SUBDIRECTORY, and DELETE_CHILD). For "make new file in dir" + * you must set the opmask to test file ADD_FILE. + * + * The L9P_ACE_DELETE flag means "can delete this thing"; it's not + * clear whether it should override the parent directory's ACL if + * any. In our case it does not, but a caller may try + * L9P_ACE_DELETE_CHILD (separately, on its own) and then a + * (second, separate) L9P_ACE_DELETE, to make the permissions work + * as "or" instead of "and". + * + * Pass a NULL parent/pstat if they are not applicable, e.g., + * for doing operations on an existing file, such as reading or + * writing data or attributes. Pass in a null child/cstat if + * that's not applicable, such as creating a new file/dir. + * + * NB: it's probably wise to allow the owner of any file to update + * the ACLs of that file, but we leave that test to the caller. + */ +int l9p_acl_check_access(int32_t opmask, struct l9p_acl_check_args *args) +{ + struct l9p_acl *parent, *child; + struct stat *pstat, *cstat; + int32_t pop, cop; + size_t ngids; + uid_t uid; + gid_t gid, *gids; + int panswer, canswer; + + assert(opmask != 0); + parent = args->aca_parent; + pstat = args->aca_pstat; + child = args->aca_child; + cstat = args->aca_cstat; + uid = args->aca_uid; + gid = args->aca_gid; + gids = args->aca_groups; + ngids = args->aca_ngroups; + +#ifdef ACE_DEBUG + L9P_LOG(L9P_DEBUG, + "l9p_acl_check_access: opmask=0x%x uid=%ld gid=%ld ngids=%zd", + (u_int)opmask, (long)uid, (long)gid, ngids); +#endif + /* + * If caller said "superuser semantics", check that first. + * Note that we apply them regardless of ACLs. + */ + if (uid == 0 && args->aca_superuser) + return (0); + + /* + * If told to ignore ACLs and use only stat-based permissions, + * discard any non-NULL ACL pointers. + * + * This will need some fancying up when we support POSIX ACLs. + */ + if ((args->aca_aclmode & L9P_ACM_NFS_ACL) == 0) + parent = child = NULL; + + assert(parent == NULL || parent->acl_acetype == L9P_ACLTYPE_NFSv4); + assert(parent == NULL || pstat != NULL); + assert(child == NULL || child->acl_acetype == L9P_ACLTYPE_NFSv4); + assert(child == NULL || cstat != NULL); + assert(pstat != NULL || cstat != NULL); + + /* + * If the operation is UNLINK we should have either both ACLs + * or no ACLs, but we won't require that here. + * + * If a parent ACL is supplied, it's a directory by definition. + * Make sure we're allowed to do this there, whatever this is. + * If a child ACL is supplied, check it too. Note that the + * DELETE permission only applies in the child though, not + * in the parent, and the DELETE_CHILD only applies in the + * parent. + */ + pop = cop = opmask; + if (parent != NULL || pstat != NULL) { + /* + * Remove child-only bits from parent op and + * parent-only bits from child op. + * + * L9P_ACE_DELETE is child-only. + * + * L9P_ACE_DELETE_CHILD is parent-only, and three data + * access bits overlap with three directory access bits. + * We should have child==NULL && cstat==NULL, so the + * three data bits should be redundant, but it's + * both trivial and safest to remove them anyway. + */ + pop &= ~L9P_ACE_DELETE; + cop &= ~(L9P_ACE_DELETE_CHILD | L9P_ACE_LIST_DIRECTORY | + L9P_ACE_ADD_FILE | L9P_ACE_ADD_SUBDIRECTORY); + } else { + /* + * Remove child-only bits from parent op. We need + * not bother since we just found we have no parent + * and no pstat, and hence won't actually *use* pop. + * + * pop &= ~(L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA | + * L9P_ACE_APPEND_DATA); + */ + } + panswer = 0; + canswer = 0; + if (parent != NULL) + panswer = l9p_check_aces(pop, parent, pstat, + uid, gid, gids, ngids); + if (child != NULL) + canswer = l9p_check_aces(cop, child, cstat, + uid, gid, gids, ngids); + + if (panswer || canswer) { + /* + * Got a definitive answer from parent and/or + * child ACLs. We're not quite done yet though. + */ + if (opmask == L9P_ACOP_UNLINK) { + /* + * For UNLINK, we can get an allow from child + * and deny from parent, or vice versa. It's + * not 100% clear how to handle the two-answer + * case. ZFS says that if either says "allow", + * we allow, and if both definitely say "deny", + * we deny. This makes sense, so we do that + * here for all cases, even "strict". + */ + if (panswer > 0 || canswer > 0) + return (0); + if (panswer < 0 && canswer < 0) + return (EPERM); + /* non-definitive answer from one! move on */ + } else { + /* + * Have at least one definitive answer, and + * should have only one; obey whichever + * one it is. + */ + if (panswer) + return (panswer < 0 ? EPERM : 0); + return (canswer < 0 ? EPERM : 0); + } + } + + /* + * No definitive answer from ACLs alone. Check for ZFS style + * permissions checking and an "UNLINK" operation under ACLs. + * If so, find write-and-execute permission on parent. + * Note that WRITE overlaps with ADD_FILE -- that's ZFS's + * way of saying "allow write to dir" -- but EXECUTE is + * separate from LIST_DIRECTORY, so that's at least a little + * bit cleaner. + * + * Note also that only a definitive yes (both bits are + * explicitly allowed) results in granting unlink, and + * a definitive no (at least one bit explicitly denied) + * results in EPERM. Only "no answer" moves on. + */ + if ((args->aca_aclmode & L9P_ACM_ZFS_ACL) && + opmask == L9P_ACOP_UNLINK && parent != NULL) { + panswer = l9p_check_aces(L9P_ACE_ADD_FILE | L9P_ACE_EXECUTE, + parent, pstat, uid, gid, gids, ngids); + if (panswer) + return (panswer < 0 ? EPERM : 0); + } + + /* + * No definitive answer from ACLs. + * + * Try POSIX style rwx permissions if allowed. This should + * be rare, occurring mainly when caller supplied no ACLs + * or set the mode to suppress them. + * + * The stat to check is the parent's if we don't have a child + * (i.e., this is a dir op), or if the DELETE_CHILD bit is set + * (i.e., this is an unlink or similar). Otherwise it's the + * child's. + */ + if (args->aca_aclmode & L9P_ACM_STAT_MODE) { + struct stat *st; + int rwx, bits; + + rwx = l9p_ace_mask_to_rwx(opmask); + if ((st = cstat) == NULL || (opmask & L9P_ACE_DELETE_CHILD)) + st = pstat; + if (uid == st->st_uid) + bits = (st->st_mode >> 6) & 7; + else if (l9p_ingroup(st->st_gid, gid, gids, ngids)) + bits = (st->st_mode >> 3) & 7; + else + bits = st->st_mode & 7; + /* + * If all the desired bits are set, we're OK. + */ + if ((rwx & bits) == rwx) + return (0); + } + + /* all methods have failed, return EPERM */ + return (EPERM); +} + +/* + * Collapse fancy ACL operation mask down to simple Unix bits. + * + * Directory operations don't map that well. However, listing + * a directory really does require read permission, and adding + * or deleting files really does require write permission, so + * this is probably sufficient. + */ +int +l9p_ace_mask_to_rwx(int32_t opmask) +{ + int rwx = 0; + + if (opmask & + (L9P_ACE_READ_DATA | L9P_ACE_READ_NAMED_ATTRS | + L9P_ACE_READ_ATTRIBUTES | L9P_ACE_READ_ACL)) + rwx |= 4; + if (opmask & + (L9P_ACE_WRITE_DATA | L9P_ACE_APPEND_DATA | + L9P_ACE_ADD_FILE | L9P_ACE_ADD_SUBDIRECTORY | + L9P_ACE_DELETE | L9P_ACE_DELETE_CHILD | + L9P_ACE_WRITE_NAMED_ATTRS | L9P_ACE_WRITE_ATTRIBUTES | + L9P_ACE_WRITE_ACL)) + rwx |= 2; + if (opmask & L9P_ACE_EXECUTE) + rwx |= 1; + return (rwx); +} + +#ifndef __APPLE__ +/* + * Allocate new ACL holder and ACEs. + */ +static struct l9p_acl * +l9p_new_acl(uint32_t acetype, uint32_t aceasize) +{ + struct l9p_acl *ret; + size_t asize, size; + + asize = aceasize * sizeof(struct l9p_ace); + size = sizeof(struct l9p_acl) + asize; + ret = malloc(size); + if (ret != NULL) { + ret->acl_acetype = acetype; + ret->acl_nace = 0; + ret->acl_aceasize = aceasize; + } + return (ret); +} + +/* + * Expand ACL to accomodate more entries. + * + * Currently won't shrink, only grow, so it's a fast no-op until + * we hit the allocated size. After that, it's best to grow in + * big chunks, or this will be O(n**2). + */ +static struct l9p_acl * +l9p_growacl(struct l9p_acl *acl, uint32_t aceasize) +{ + struct l9p_acl *tmp; + size_t asize, size; + + if (acl->acl_aceasize < aceasize) { + asize = aceasize * sizeof(struct l9p_ace); + size = sizeof(struct l9p_acl) + asize; + tmp = realloc(acl, size); + if (tmp == NULL) + free(acl); + acl = tmp; + } + return (acl); +} + +/* + * Annoyingly, there's no POSIX-standard way to count the number + * of ACEs in a system ACL other than to walk through them all. + * This is silly, but at least 2n is still O(n), and the walk is + * short. (If the system ACL mysteriously grows, we'll handle + * that OK via growacl(), too.) + */ +static int +l9p_count_aces(acl_t sysacl) +{ + acl_entry_t entry; + uint32_t n; + int id; + + id = ACL_FIRST_ENTRY; + for (n = 0; acl_get_entry(sysacl, id, &entry) == 1; n++) + id = ACL_NEXT_ENTRY; + + return ((int)n); +} + +/* + * Create ACL with ACEs from the given acl_t. We use the given + * convert function on each ACE. + */ +static struct l9p_acl * +l9p_sysacl_to_acl(int acetype, acl_t sysacl, econvertfn *convert) +{ + struct l9p_acl *acl; + acl_entry_t entry; + uint32_t n; + int error, id; + + acl = l9p_new_acl((uint32_t)acetype, (uint32_t)l9p_count_aces(sysacl)); + if (acl == NULL) + return (NULL); + id = ACL_FIRST_ENTRY; + for (n = 0;;) { + if (acl_get_entry(sysacl, id, &entry) != 1) + break; + acl = l9p_growacl(acl, n + 1); + if (acl == NULL) + return (NULL); + error = (*convert)(entry, &acl->acl_aces[n]); + id = ACL_NEXT_ENTRY; + if (error == 0) + n++; + } + acl->acl_nace = n; + return (acl); +} +#endif + +#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */ +struct l9p_acl * +l9p_posix_acl_to_acl(acl_t sysacl) +{ +} +#endif + +#if defined(HAVE_FREEBSD_ACLS) +static int +l9p_frombsdnfs4(acl_entry_t sysace, struct l9p_ace *ace) +{ + acl_tag_t tag; /* e.g., USER_OBJ, GROUP, etc */ + acl_entry_type_t entry_type; /* e.g., allow/deny */ + acl_permset_t absdperm; + acl_flagset_t absdflag; + acl_perm_t bsdperm; /* e.g., READ_DATA */ + acl_flag_t bsdflag; /* e.g., FILE_INHERIT_ACE */ + uint32_t flags, mask; + int error; + uid_t uid, *aid; + + error = acl_get_tag_type(sysace, &tag); + if (error == 0) + error = acl_get_entry_type_np(sysace, &entry_type); + if (error == 0) + error = acl_get_flagset_np(sysace, &absdflag); + if (error == 0) + error = acl_get_permset(sysace, &absdperm); + if (error) + return (error); + + flags = 0; + uid = 0; + aid = NULL; + + /* move user/group/everyone + id-is-group-id into flags */ + switch (tag) { + case ACL_USER_OBJ: + flags |= L9P_ACEF_OWNER; + break; + case ACL_GROUP_OBJ: + flags |= L9P_ACEF_GROUP; + break; + case ACL_EVERYONE: + flags |= L9P_ACEF_EVERYONE; + break; + case ACL_GROUP: + flags |= L9P_ACEF_IDENTIFIER_GROUP; + /* FALLTHROUGH */ + case ACL_USER: + aid = acl_get_qualifier(sysace); /* ugh, this malloc()s */ + if (aid == NULL) + return (ENOMEM); + uid = *(uid_t *)aid; + free(aid); + aid = &uid; + break; + default: + return (EINVAL); /* can't happen */ + } + + switch (entry_type) { + + case ACL_ENTRY_TYPE_ALLOW: + ace->ace_type = L9P_ACET_ACCESS_ALLOWED; + break; + + case ACL_ENTRY_TYPE_DENY: + ace->ace_type = L9P_ACET_ACCESS_DENIED; + break; + + case ACL_ENTRY_TYPE_AUDIT: + ace->ace_type = L9P_ACET_SYSTEM_AUDIT; + break; + + case ACL_ENTRY_TYPE_ALARM: + ace->ace_type = L9P_ACET_SYSTEM_ALARM; + break; + + default: + return (EINVAL); /* can't happen */ + } + + /* transform remaining BSD flags to internal NFS-y form */ + bsdflag = *absdflag; + if (bsdflag & ACL_ENTRY_FILE_INHERIT) + flags |= L9P_ACEF_FILE_INHERIT_ACE; + if (bsdflag & ACL_ENTRY_DIRECTORY_INHERIT) + flags |= L9P_ACEF_DIRECTORY_INHERIT_ACE; + if (bsdflag & ACL_ENTRY_NO_PROPAGATE_INHERIT) + flags |= L9P_ACEF_NO_PROPAGATE_INHERIT_ACE; + if (bsdflag & ACL_ENTRY_INHERIT_ONLY) + flags |= L9P_ACEF_INHERIT_ONLY_ACE; + if (bsdflag & ACL_ENTRY_SUCCESSFUL_ACCESS) + flags |= L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG; + if (bsdflag & ACL_ENTRY_FAILED_ACCESS) + flags |= L9P_ACEF_FAILED_ACCESS_ACE_FLAG; + ace->ace_flags = flags; + + /* + * Transform BSD permissions to ace_mask. Note that directory + * vs file bits are the same in both sets, so we don't need + * to worry about that, at least. + * + * There seem to be no BSD equivalents for WRITE_RETENTION + * and WRITE_RETENTION_HOLD. + */ + mask = 0; + bsdperm = *absdperm; + if (bsdperm & ACL_READ_DATA) + mask |= L9P_ACE_READ_DATA; + if (bsdperm & ACL_WRITE_DATA) + mask |= L9P_ACE_WRITE_DATA; + if (bsdperm & ACL_APPEND_DATA) + mask |= L9P_ACE_APPEND_DATA; + if (bsdperm & ACL_READ_NAMED_ATTRS) + mask |= L9P_ACE_READ_NAMED_ATTRS; + if (bsdperm & ACL_WRITE_NAMED_ATTRS) + mask |= L9P_ACE_WRITE_NAMED_ATTRS; + if (bsdperm & ACL_EXECUTE) + mask |= L9P_ACE_EXECUTE; + if (bsdperm & ACL_DELETE_CHILD) + mask |= L9P_ACE_DELETE_CHILD; + if (bsdperm & ACL_READ_ATTRIBUTES) + mask |= L9P_ACE_READ_ATTRIBUTES; + if (bsdperm & ACL_WRITE_ATTRIBUTES) + mask |= L9P_ACE_WRITE_ATTRIBUTES; + /* L9P_ACE_WRITE_RETENTION */ + /* L9P_ACE_WRITE_RETENTION_HOLD */ + /* 0x00800 */ + if (bsdperm & ACL_DELETE) + mask |= L9P_ACE_DELETE; + if (bsdperm & ACL_READ_ACL) + mask |= L9P_ACE_READ_ACL; + if (bsdperm & ACL_WRITE_ACL) + mask |= L9P_ACE_WRITE_ACL; + if (bsdperm & ACL_WRITE_OWNER) + mask |= L9P_ACE_WRITE_OWNER; + if (bsdperm & ACL_SYNCHRONIZE) + mask |= L9P_ACE_SYNCHRONIZE; + ace->ace_mask = mask; + + /* fill in variable-size user or group ID bytes */ + if (aid == NULL) + ace->ace_idsize = 0; + else { + ace->ace_idsize = sizeof(uid); + memcpy(&ace->ace_idbytes[0], aid, sizeof(uid)); + } + + return (0); +} + +struct l9p_acl * +l9p_freebsd_nfsv4acl_to_acl(acl_t sysacl) +{ + + return (l9p_sysacl_to_acl(L9P_ACLTYPE_NFSv4, sysacl, l9p_frombsdnfs4)); +} +#endif + +#if defined(HAVE_DARWIN_ACLS) && 0 /* not yet */ +struct l9p_acl * +l9p_darwin_nfsv4acl_to_acl(acl_t sysacl) +{ +} +#endif Index: lib/lib9p/hashtable.h =================================================================== --- /dev/null +++ lib/lib9p/hashtable.h @@ -0,0 +1,107 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_HASHTABLE_H +#define LIB9P_HASHTABLE_H + +#include +#include + +struct ht { + struct ht_entry * ht_entries; + ssize_t ht_nentries; + pthread_rwlock_t ht_rwlock; +}; + +struct ht_entry { + TAILQ_HEAD(, ht_item) hte_items; +}; + +struct ht_item { + uint32_t hti_hash; + void * hti_data; + TAILQ_ENTRY(ht_item) hti_link; +}; + +struct ht_iter { + struct ht * htit_parent; + struct ht_item * htit_curr; + struct ht_item * htit_next; + ssize_t htit_slot; +}; + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wthread-safety-analysis" +#endif + +/* + * Obtain read-lock on hash table. + */ +static inline int +ht_rdlock(struct ht *h) +{ + + return (pthread_rwlock_rdlock(&h->ht_rwlock)); +} + +/* + * Obtain write-lock on hash table. + */ +static inline int +ht_wrlock(struct ht *h) +{ + + return (pthread_rwlock_wrlock(&h->ht_rwlock)); +} + +/* + * Release lock on hash table. + */ +static inline int +ht_unlock(struct ht *h) +{ + + return (pthread_rwlock_unlock(&h->ht_rwlock)); +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +void ht_init(struct ht *h, ssize_t size); +void ht_destroy(struct ht *h); +void *ht_find(struct ht *h, uint32_t hash); +void *ht_find_locked(struct ht *h, uint32_t hash); +int ht_add(struct ht *h, uint32_t hash, void *value); +int ht_remove(struct ht *h, uint32_t hash); +int ht_remove_locked(struct ht *h, uint32_t hash); +int ht_remove_at_iter(struct ht_iter *iter); +void ht_iter(struct ht *h, struct ht_iter *iter); +void *ht_next(struct ht_iter *iter); + +#endif /* LIB9P_HASHTABLE_H */ Index: lib/lib9p/hashtable.c =================================================================== --- /dev/null +++ lib/lib9p/hashtable.c @@ -0,0 +1,267 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include "lib9p_impl.h" +#include "hashtable.h" + +static struct ht_item *ht_iter_advance(struct ht_iter *, struct ht_item *); + +void +ht_init(struct ht *h, ssize_t size) +{ + ssize_t i; + + memset(h, 0, sizeof(struct ht)); + h->ht_nentries = size; + h->ht_entries = l9p_calloc((size_t)size, sizeof(struct ht_entry)); + pthread_rwlock_init(&h->ht_rwlock, NULL); + + for (i = 0; i < size; i++) + TAILQ_INIT(&h->ht_entries[i].hte_items); +} + +void +ht_destroy(struct ht *h) +{ + struct ht_entry *he; + struct ht_item *item, *tmp; + ssize_t i; + + for (i = 0; i < h->ht_nentries; i++) { + he = &h->ht_entries[i]; + TAILQ_FOREACH_SAFE(item, &he->hte_items, hti_link, tmp) { + free(item); + } + } + + pthread_rwlock_destroy(&h->ht_rwlock); + free(h->ht_entries); + h->ht_entries = NULL; +} + +void * +ht_find(struct ht *h, uint32_t hash) +{ + void *result; + + ht_rdlock(h); + result = ht_find_locked(h, hash); + ht_unlock(h); + return (result); +} + +void * +ht_find_locked(struct ht *h, uint32_t hash) +{ + struct ht_entry *entry; + struct ht_item *item; + + entry = &h->ht_entries[hash % h->ht_nentries]; + + TAILQ_FOREACH(item, &entry->hte_items, hti_link) { + if (item->hti_hash == hash) + return (item->hti_data); + } + + return (NULL); +} + +int +ht_add(struct ht *h, uint32_t hash, void *value) +{ + struct ht_entry *entry; + struct ht_item *item; + + ht_wrlock(h); + entry = &h->ht_entries[hash % h->ht_nentries]; + + TAILQ_FOREACH(item, &entry->hte_items, hti_link) { + if (item->hti_hash == hash) { + errno = EEXIST; + ht_unlock(h); + return (-1); + } + } + + item = l9p_calloc(1, sizeof(struct ht_item)); + item->hti_hash = hash; + item->hti_data = value; + TAILQ_INSERT_TAIL(&entry->hte_items, item, hti_link); + ht_unlock(h); + + return (0); +} + +int +ht_remove(struct ht *h, uint32_t hash) +{ + int result; + + ht_wrlock(h); + result = ht_remove_locked(h, hash); + ht_unlock(h); + return (result); +} + +int +ht_remove_locked(struct ht *h, uint32_t hash) +{ + struct ht_entry *entry; + struct ht_item *item, *tmp; + ssize_t slot = hash % h->ht_nentries; + + entry = &h->ht_entries[slot]; + + TAILQ_FOREACH_SAFE(item, &entry->hte_items, hti_link, tmp) { + if (item->hti_hash == hash) { + TAILQ_REMOVE(&entry->hte_items, item, hti_link); + free(item); + return (0); + } + } + + errno = ENOENT; + return (-1); +} + +/* + * Inner workings for advancing the iterator. + * + * If we have a current item, that tells us how to find the + * next item. If not, we get the first item from the next + * slot (well, the next slot with an item); in any case, we + * record the new slot and return the next item. + * + * For bootstrapping, iter->htit_slot can be -1 to start + * searching at slot 0. + * + * Caller must hold a lock on the table. + */ +static struct ht_item * +ht_iter_advance(struct ht_iter *iter, struct ht_item *cur) +{ + struct ht_item *next; + struct ht *h; + ssize_t slot; + + h = iter->htit_parent; + + if (cur == NULL) + next = NULL; + else + next = TAILQ_NEXT(cur, hti_link); + + if (next == NULL) { + slot = iter->htit_slot; + while (++slot < h->ht_nentries) { + next = TAILQ_FIRST(&h->ht_entries[slot].hte_items); + if (next != NULL) + break; + } + iter->htit_slot = slot; + } + return (next); +} + +/* + * Remove the current item - there must be one, or this is an + * error. This (necessarily) pre-locates the next item, so callers + * must not use it on an actively-changing table. + */ +int +ht_remove_at_iter(struct ht_iter *iter) +{ + struct ht_item *item; + struct ht *h; + ssize_t slot; + + assert(iter != NULL); + + if ((item = iter->htit_curr) == NULL) { + errno = EINVAL; + return (-1); + } + + /* remove the item from the table, saving the NEXT one */ + h = iter->htit_parent; + ht_wrlock(h); + slot = iter->htit_slot; + iter->htit_next = ht_iter_advance(iter, item); + TAILQ_REMOVE(&h->ht_entries[slot].hte_items, item, hti_link); + ht_unlock(h); + + /* mark us as no longer on an item, then free it */ + iter->htit_curr = NULL; + free(item); + + return (0); +} + +/* + * Initialize iterator. Subsequent ht_next calls will find the + * first item, then the next, and so on. Callers should in general + * not use this on actively-changing tables, though we do our best + * to make it semi-sensible. + */ +void +ht_iter(struct ht *h, struct ht_iter *iter) +{ + + iter->htit_parent = h; + iter->htit_curr = NULL; + iter->htit_next = NULL; + iter->htit_slot = -1; /* which will increment to 0 */ +} + +/* + * Return the next item, which is the first item if we have not + * yet been called on this iterator, or the next item if we have. + */ +void * +ht_next(struct ht_iter *iter) +{ + struct ht_item *item; + struct ht *h; + + if ((item = iter->htit_next) == NULL) { + /* no pre-loaded next; find next from current */ + h = iter->htit_parent; + ht_rdlock(h); + item = ht_iter_advance(iter, iter->htit_curr); + ht_unlock(h); + } else + iter->htit_next = NULL; + iter->htit_curr = item; + return (item == NULL ? NULL : item->hti_data); +} Index: lib/lib9p/lib9p.h =================================================================== --- /dev/null +++ lib/lib9p/lib9p.h @@ -0,0 +1,249 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + + +#ifndef LIB9P_LIB9P_H +#define LIB9P_LIB9P_H + +#include +#include +#include +#include +#include +#include + +#if defined(__FreeBSD__) +#include +#else +#include "sbuf/sbuf.h" +#endif + +#include "fcall.h" +#include "threadpool.h" +#include "hashtable.h" + +#define L9P_DEFAULT_MSIZE 8192 +#define L9P_MAX_IOV 128 +#define L9P_NUMTHREADS 8 + +struct l9p_request; +struct l9p_backend; +struct l9p_fid; + +/* + * Functions to implement underlying transport for lib9p. + * + * The transport is responsible for: + * + * - allocating a response buffer (filling in the iovec and niov) + * (gets req, pointer to base of iov array of size L9P_MAX_IOV, + * pointer to niov, lt_aux) + * + * - sending a response, when a request has a reply ready + * (gets req, pointer to iov, niov, actual response length, lt_aux) + * + * - dropping the response buffer, when a request has been + * flushed or otherwise dropped without a response + * (gets req, pointer to iov, niov, lt_aux) + * + * The transport is of course also responsible for feeding in + * request-buffers, but that happens by the transport calling + * l9p_connection_recv(). + */ +struct l9p_transport { + void *lt_aux; + int (*lt_get_response_buffer)(struct l9p_request *, struct iovec *, + size_t *, void *); + int (*lt_send_response)(struct l9p_request *, const struct iovec *, + size_t, size_t, void *); + void (*lt_drop_response)(struct l9p_request *, const struct iovec *, + size_t, void *); +}; + +enum l9p_pack_mode { + L9P_PACK, + L9P_UNPACK +}; + +enum l9p_integer_type { + L9P_BYTE = 1, + L9P_WORD = 2, + L9P_DWORD = 4, + L9P_QWORD = 8 +}; + +enum l9p_version { + L9P_INVALID_VERSION = 0, + L9P_2000 = 1, + L9P_2000U = 2, + L9P_2000L = 3 +}; + +/* + * This structure is used for unpacking (decoding) incoming + * requests and packing (encoding) outgoing results. It has its + * own copy of the iov array, with its own counters for working + * through that array, but it borrows the actual DATA from the + * original iov array associated with the original request (see + * below). + */ +struct l9p_message { + enum l9p_pack_mode lm_mode; + struct iovec lm_iov[L9P_MAX_IOV]; + size_t lm_niov; + size_t lm_cursor_iov; + size_t lm_cursor_offset; + size_t lm_size; +}; + +/* + * Data structure for a request/response pair (Tfoo/Rfoo). + * + * Note that the response is not formatted out into raw data + * (overwriting the request raw data) until we are really + * responding, with the exception of read operations Tread + * and Treaddir, which overlay their result-data into the + * iov array in the process of reading. + * + * We have room for two incoming fids, in case we are + * using 9P2000.L protocol. Note that nothing that uses two + * fids also has an output fid (newfid), so we could have a + * union of lr_fid2 and lr_newfid, but keeping them separate + * is probably a bit less error-prone. (If we want to shave + * memory requirements there are more places to look.) + * + * (The fid, fid2, and newfid fields should be removed via + * reorganization, as they are only used for smuggling data + * between request.c and the backend and should just be + * parameters to backend ops.) + */ +struct l9p_request { + struct l9p_message lr_req_msg; /* for unpacking the request */ + struct l9p_message lr_resp_msg; /* for packing the response */ + union l9p_fcall lr_req; /* the request, decoded/unpacked */ + union l9p_fcall lr_resp; /* the response, not yet packed */ + + struct l9p_fid *lr_fid; + struct l9p_fid *lr_fid2; + struct l9p_fid *lr_newfid; + + struct l9p_connection *lr_conn; /* containing connection */ + void *lr_aux; /* reserved for transport layer */ + + struct iovec lr_data_iov[L9P_MAX_IOV]; /* iovecs for req + resp */ + size_t lr_data_niov; /* actual size of data_iov */ + + int lr_error; /* result from l9p_dispatch_request */ + + /* proteced by threadpool mutex */ + enum l9p_workstate lr_workstate; /* threadpool: work state */ + enum l9p_flushstate lr_flushstate; /* flush state if flushee */ + struct l9p_worker *lr_worker; /* threadpool: worker */ + STAILQ_ENTRY(l9p_request) lr_worklink; /* reserved to threadpool */ + + /* protected by tag hash table lock */ + struct l9p_request_queue lr_flushq; /* q of flushers */ + STAILQ_ENTRY(l9p_request) lr_flushlink; /* link w/in flush queue */ +}; + +/* N.B.: these dirents are variable length and for .L only */ +struct l9p_dirent { + struct l9p_qid qid; + uint64_t offset; + uint8_t type; + char *name; +}; + +/* + * The 9pfs protocol has the notion of a "session", which is + * traffic between any two "Tversion" requests. All fids + * (lc_files, below) are specific to one particular session. + * + * We need a data structure per connection (client/server + * pair). This data structure lasts longer than these 9pfs + * sessions, but contains the request/response pairs and fids. + * Logically, the per-session data should be separate, but + * most of the time that would just require an extra + * indirection. Instead, a new session simply clunks all + * fids, and otherwise keeps using this same connection. + */ +struct l9p_connection { + struct l9p_server *lc_server; + struct l9p_transport lc_lt; + struct l9p_threadpool lc_tp; + enum l9p_version lc_version; + uint32_t lc_msize; + uint32_t lc_max_io_size; + struct ht lc_files; + struct ht lc_requests; + LIST_ENTRY(l9p_connection) lc_link; +}; + +struct l9p_server { + struct l9p_backend *ls_backend; + enum l9p_version ls_max_version; + LIST_HEAD(, l9p_connection) ls_conns; +}; + +int l9p_pufcall(struct l9p_message *msg, union l9p_fcall *fcall, + enum l9p_version version); +ssize_t l9p_pustat(struct l9p_message *msg, struct l9p_stat *s, + enum l9p_version version); +uint16_t l9p_sizeof_stat(struct l9p_stat *stat, enum l9p_version version); +int l9p_pack_stat(struct l9p_message *msg, struct l9p_request *req, + struct l9p_stat *s); +ssize_t l9p_pudirent(struct l9p_message *msg, struct l9p_dirent *de); + +int l9p_server_init(struct l9p_server **serverp, struct l9p_backend *backend); + +int l9p_connection_init(struct l9p_server *server, + struct l9p_connection **connp); +void l9p_connection_free(struct l9p_connection *conn); +void l9p_connection_recv(struct l9p_connection *conn, const struct iovec *iov, + size_t niov, void *aux); +void l9p_connection_close(struct l9p_connection *conn); +struct l9p_fid *l9p_connection_alloc_fid(struct l9p_connection *conn, + uint32_t fid); +void l9p_connection_remove_fid(struct l9p_connection *conn, + struct l9p_fid *fid); + +int l9p_dispatch_request(struct l9p_request *req); +void l9p_respond(struct l9p_request *req, bool drop, bool rmtag); + +void l9p_init_msg(struct l9p_message *msg, struct l9p_request *req, + enum l9p_pack_mode mode); +void l9p_seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2, + size_t *niov2, size_t seek); +size_t l9p_truncate_iov(struct iovec *iov, size_t niov, size_t length); +void l9p_describe_fcall(union l9p_fcall *fcall, enum l9p_version version, + struct sbuf *sb); +void l9p_freefcall(union l9p_fcall *fcall); +void l9p_freestat(struct l9p_stat *stat); + +gid_t *l9p_getgrlist(const char *, gid_t, int *); + +#endif /* LIB9P_LIB9P_H */ Index: lib/lib9p/lib9p_impl.h =================================================================== --- /dev/null +++ lib/lib9p/lib9p_impl.h @@ -0,0 +1,78 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_LIB9P_IMPL_H +#define LIB9P_LIB9P_IMPL_H + +#include +#include + +#ifndef _KERNEL +static inline void * +l9p_malloc(size_t size) +{ + void *r = malloc(size); + + if (r == NULL) { + fprintf(stderr, "cannot allocate %zd bytes: out of memory\n", + size); + abort(); + } + + return (r); +} + +static inline void * +l9p_calloc(size_t n, size_t size) +{ + void *r = calloc(n, size); + + if (r == NULL) { + fprintf(stderr, "cannot allocate %zd bytes: out of memory\n", + n * size); + abort(); + } + + return (r); +} + +static inline void * +l9p_realloc(void *ptr, size_t newsize) +{ + void *r = realloc(ptr, newsize); + + if (r == NULL) { + fprintf(stderr, "cannot allocate %zd bytes: out of memory\n", + newsize); + abort(); + } + + return (r); +} +#endif /* _KERNEL */ + +#endif /* LIB9P_LIB9P_IMPL_H */ Index: lib/lib9p/linux_errno.h =================================================================== --- /dev/null +++ lib/lib9p/linux_errno.h @@ -0,0 +1,247 @@ +/* + * Copyright 2016 Chris Torek + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_LINUX_ERRNO_H +#define LIB9P_LINUX_ERRNO_H + +/* + * Linux error numbers that are outside of the original base range + * (which ends with ERANGE). + * + * This is pretty much the same as Linux's errno.h except that the + * names are prefixed with "LINUX_", and we add _STR with the + * string name. + * + * The string expansions were obtained with a little program to + * print every strerror(). + * + * Note that BSD EDEADLK is 11 and BSD EAGAIN is 35, vs + * Linux / Plan9 EAGAIN at 11. So one value in the ERANGE + * range still needs translation too. + */ + +#define LINUX_EAGAIN 11 +#define LINUX_EAGAIN_STR "Resource temporarily unavailable" + +#define LINUX_EDEADLK 35 +#define LINUX_EDEADLK_STR "Resource deadlock avoided" +#define LINUX_ENAMETOOLONG 36 +#define LINUX_ENAMETOOLONG_STR "File name too long" +#define LINUX_ENOLCK 37 +#define LINUX_ENOLCK_STR "No locks available" +#define LINUX_ENOSYS 38 +#define LINUX_ENOSYS_STR "Function not implemented" +#define LINUX_ENOTEMPTY 39 +#define LINUX_ENOTEMPTY_STR "Directory not empty" +#define LINUX_ELOOP 40 +#define LINUX_ELOOP_STR "Too many levels of symbolic links" +/* 41 unused */ +#define LINUX_ENOMSG 42 +#define LINUX_ENOMSG_STR "No message of desired type" +#define LINUX_EIDRM 43 +#define LINUX_EIDRM_STR "Identifier removed" +#define LINUX_ECHRNG 44 +#define LINUX_ECHRNG_STR "Channel number out of range" +#define LINUX_EL2NSYNC 45 +#define LINUX_EL2NSYNC_STR "Level 2 not synchronized" +#define LINUX_EL3HLT 46 +#define LINUX_EL3HLT_STR "Level 3 halted" +#define LINUX_EL3RST 47 +#define LINUX_EL3RST_STR "Level 3 reset" +#define LINUX_ELNRNG 48 +#define LINUX_ELNRNG_STR "Link number out of range" +#define LINUX_EUNATCH 49 +#define LINUX_EUNATCH_STR "Protocol driver not attached" +#define LINUX_ENOCSI 50 +#define LINUX_ENOCSI_STR "No CSI structure available" +#define LINUX_EL2HLT 51 +#define LINUX_EL2HLT_STR "Level 2 halted" +#define LINUX_EBADE 52 +#define LINUX_EBADE_STR "Invalid exchange" +#define LINUX_EBADR 53 +#define LINUX_EBADR_STR "Invalid request descriptor" +#define LINUX_EXFULL 54 +#define LINUX_EXFULL_STR "Exchange full" +#define LINUX_ENOANO 55 +#define LINUX_ENOANO_STR "No anode" +#define LINUX_EBADRQC 56 +#define LINUX_EBADRQC_STR "Invalid request code" +#define LINUX_EBADSLT 57 +#define LINUX_EBADSLT_STR "Invalid slot" +/* 58 unused */ +#define LINUX_EBFONT 59 +#define LINUX_EBFONT_STR "Bad font file format" +#define LINUX_ENOSTR 60 +#define LINUX_ENOSTR_STR "Device not a stream" +#define LINUX_ENODATA 61 +#define LINUX_ENODATA_STR "No data available" +#define LINUX_ETIME 62 +#define LINUX_ETIME_STR "Timer expired" +#define LINUX_ENOSR 63 +#define LINUX_ENOSR_STR "Out of streams resources" +#define LINUX_ENONET 64 +#define LINUX_ENONET_STR "Machine is not on the network" +#define LINUX_ENOPKG 65 +#define LINUX_ENOPKG_STR "Package not installed" +#define LINUX_EREMOTE 66 +#define LINUX_EREMOTE_STR "Object is remote" +#define LINUX_ENOLINK 67 +#define LINUX_ENOLINK_STR "Link has been severed" +#define LINUX_EADV 68 +#define LINUX_EADV_STR "Advertise error" +#define LINUX_ESRMNT 69 +#define LINUX_ESRMNT_STR "Srmount error" +#define LINUX_ECOMM 70 +#define LINUX_ECOMM_STR "Communication error on send" +#define LINUX_EPROTO 71 +#define LINUX_EPROTO_STR "Protocol error" +#define LINUX_EMULTIHOP 72 +#define LINUX_EMULTIHOP_STR "Multihop attempted" +#define LINUX_EDOTDOT 73 +#define LINUX_EDOTDOT_STR "RFS specific error" +#define LINUX_EBADMSG 74 +#define LINUX_EBADMSG_STR "Bad message" +#define LINUX_EOVERFLOW 75 +#define LINUX_EOVERFLOW_STR "Value too large for defined data type" +#define LINUX_ENOTUNIQ 76 +#define LINUX_ENOTUNIQ_STR "Name not unique on network" +#define LINUX_EBADFD 77 +#define LINUX_EBADFD_STR "File descriptor in bad state" +#define LINUX_EREMCHG 78 +#define LINUX_EREMCHG_STR "Remote address changed" +#define LINUX_ELIBACC 79 +#define LINUX_ELIBACC_STR "Can not access a needed shared library" +#define LINUX_ELIBBAD 80 +#define LINUX_ELIBBAD_STR "Accessing a corrupted shared library" +#define LINUX_ELIBSCN 81 +#define LINUX_ELIBSCN_STR ".lib section in a.out corrupted" +#define LINUX_ELIBMAX 82 +#define LINUX_ELIBMAX_STR "Attempting to link in too many shared libraries" +#define LINUX_ELIBEXEC 83 +#define LINUX_ELIBEXEC_STR "Cannot exec a shared library directly" +#define LINUX_EILSEQ 84 +#define LINUX_EILSEQ_STR "Invalid or incomplete multibyte or wide character" +#define LINUX_ERESTART 85 +#define LINUX_ERESTART_STR "Interrupted system call should be restarted" +#define LINUX_ESTRPIPE 86 +#define LINUX_ESTRPIPE_STR "Streams pipe error" +#define LINUX_EUSERS 87 +#define LINUX_EUSERS_STR "Too many users" +#define LINUX_ENOTSOCK 88 +#define LINUX_ENOTSOCK_STR "Socket operation on non-socket" +#define LINUX_EDESTADDRREQ 89 +#define LINUX_EDESTADDRREQ_STR "Destination address required" +#define LINUX_EMSGSIZE 90 +#define LINUX_EMSGSIZE_STR "Message too long" +#define LINUX_EPROTOTYPE 91 +#define LINUX_EPROTOTYPE_STR "Protocol wrong type for socket" +#define LINUX_ENOPROTOOPT 92 +#define LINUX_ENOPROTOOPT_STR "Protocol not available" +#define LINUX_EPROTONOSUPPORT 93 +#define LINUX_EPROTONOSUPPORT_STR "Protocol not supported" +#define LINUX_ESOCKTNOSUPPORT 94 +#define LINUX_ESOCKTNOSUPPORT_STR "Socket type not supported" +#define LINUX_EOPNOTSUPP 95 +#define LINUX_EOPNOTSUPP_STR "Operation not supported" +#define LINUX_EPFNOSUPPORT 96 +#define LINUX_EPFNOSUPPORT_STR "Protocol family not supported" +#define LINUX_EAFNOSUPPORT 97 +#define LINUX_EAFNOSUPPORT_STR "Address family not supported by protocol" +#define LINUX_EADDRINUSE 98 +#define LINUX_EADDRINUSE_STR "Address already in use" +#define LINUX_EADDRNOTAVAIL 99 +#define LINUX_EADDRNOTAVAIL_STR "Cannot assign requested address" +#define LINUX_ENETDOWN 100 +#define LINUX_ENETDOWN_STR "Network is down" +#define LINUX_ENETUNREACH 101 +#define LINUX_ENETUNREACH_STR "Network is unreachable" +#define LINUX_ENETRESET 102 +#define LINUX_ENETRESET_STR "Network dropped connection on reset" +#define LINUX_ECONNABORTED 103 +#define LINUX_ECONNABORTED_STR "Software caused connection abort" +#define LINUX_ECONNRESET 104 +#define LINUX_ECONNRESET_STR "Connection reset by peer" +#define LINUX_ENOBUFS 105 +#define LINUX_ENOBUFS_STR "No buffer space available" +#define LINUX_EISCONN 106 +#define LINUX_EISCONN_STR "Transport endpoint is already connected" +#define LINUX_ENOTCONN 107 +#define LINUX_ENOTCONN_STR "Transport endpoint is not connected" +#define LINUX_ESHUTDOWN 108 +#define LINUX_ESHUTDOWN_STR "Cannot send after transport endpoint shutdown" +#define LINUX_ETOOMANYREFS 109 +#define LINUX_ETOOMANYREFS_STR "Too many references: cannot splice" +#define LINUX_ETIMEDOUT 110 +#define LINUX_ETIMEDOUT_STR "Connection timed out" +#define LINUX_ECONNREFUSED 111 +#define LINUX_ECONNREFUSED_STR "Connection refused" +#define LINUX_EHOSTDOWN 112 +#define LINUX_EHOSTDOWN_STR "Host is down" +#define LINUX_EHOSTUNREACH 113 +#define LINUX_EHOSTUNREACH_STR "No route to host" +#define LINUX_EALREADY 114 +#define LINUX_EALREADY_STR "Operation already in progress" +#define LINUX_EINPROGRESS 115 +#define LINUX_EINPROGRESS_STR "Operation now in progress" +#define LINUX_ESTALE 116 +#define LINUX_ESTALE_STR "Stale file handle" +#define LINUX_EUCLEAN 117 +#define LINUX_EUCLEAN_STR "Structure needs cleaning" +#define LINUX_ENOTNAM 118 +#define LINUX_ENOTNAM_STR "Not a XENIX named type file" +#define LINUX_ENAVAIL 119 +#define LINUX_ENAVAIL_STR "No XENIX semaphores available" +#define LINUX_EISNAM 120 +#define LINUX_EISNAM_STR "Is a named type file" +#define LINUX_EREMOTEIO 121 +#define LINUX_EREMOTEIO_STR "Remote I/O error" +#define LINUX_EDQUOT 122 +#define LINUX_EDQUOT_STR "Quota exceeded" +#define LINUX_ENOMEDIUM 123 +#define LINUX_ENOMEDIUM_STR "No medium found" +#define LINUX_EMEDIUMTYPE 124 +#define LINUX_EMEDIUMTYPE_STR "Wrong medium type" +#define LINUX_ECANCELED 125 +#define LINUX_ECANCELED_STR "Operation canceled" +#define LINUX_ENOKEY 126 +#define LINUX_ENOKEY_STR "Required key not available" +#define LINUX_EKEYEXPIRED 127 +#define LINUX_EKEYEXPIRED_STR "Key has expired" +#define LINUX_EKEYREVOKED 128 +#define LINUX_EKEYREVOKED_STR "Key has been revoked" +#define LINUX_EKEYREJECTED 129 +#define LINUX_EKEYREJECTED_STR "Key was rejected by service" +#define LINUX_EOWNERDEAD 130 +#define LINUX_EOWNERDEAD_STR "Owner died" +#define LINUX_ENOTRECOVERABLE 131 +#define LINUX_ENOTRECOVERABLE_STR "State not recoverable" +#define LINUX_ERFKILL 132 +#define LINUX_ERFKILL_STR "Operation not possible due to RF-kill" +#define LINUX_EHWPOISON 133 +#define LINUX_EHWPOISON_STR "Memory page has hardware error" + +#endif /* LIB9P_LINUX_ERRNO_H */ Index: lib/lib9p/log.h =================================================================== --- /dev/null +++ lib/lib9p/log.h @@ -0,0 +1,46 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_LOG_H +#define LIB9P_LOG_H + +enum l9p_log_level { + L9P_DEBUG, + L9P_INFO, + L9P_WARNING, + L9P_ERROR +}; + +void l9p_logf(enum l9p_log_level level, const char *func, const char *fmt, ...); + +#if defined(L9P_DEBUG) +#define L9P_LOG(level, fmt, ...) l9p_logf(level, __func__, fmt, ##__VA_ARGS__) +#else +#define L9P_LOG(level, fmt, ...) +#endif + +#endif /* LIB9P_LOG_H */ Index: lib/lib9p/log.c =================================================================== --- /dev/null +++ lib/lib9p/log.c @@ -0,0 +1,67 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include "log.h" + +static const char *l9p_log_level_names[] = { + "DEBUG", + "INFO", + "WARN", + "ERROR" +}; + +void +l9p_logf(enum l9p_log_level level, const char *func, const char *fmt, ...) +{ + const char *dest = NULL; + static FILE *stream = NULL; + va_list ap; + + if (stream == NULL) { + dest = getenv("LIB9P_LOGGING"); + if (dest == NULL) + return; + else if (!strcmp(dest, "stderr")) + stream = stderr; + else { + stream = fopen(dest, "a"); + if (stream == NULL) + return; + } + } + + va_start(ap, fmt); + fprintf(stream, "[%s]\t %s: ", l9p_log_level_names[level], func); + vfprintf(stream, fmt, ap); + fprintf(stream, "\n"); + fflush(stream); + va_end(ap); +} Index: lib/lib9p/pack.c =================================================================== --- /dev/null +++ lib/lib9p/pack.c @@ -0,0 +1,993 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Based on libixp code: ©2007-2010 Kris Maglione + */ + +#include +#include +#include +#include +#include +#ifdef __APPLE__ +# include "apple_endian.h" +#else +# include +#endif +#include +#include "lib9p.h" +#include "lib9p_impl.h" +#include "log.h" + +#define N(ary) (sizeof(ary) / sizeof(*ary)) +#define STRING_SIZE(s) (L9P_WORD + (s != NULL ? (uint16_t)strlen(s) : 0)) +#define QID_SIZE (L9P_BYTE + L9P_DWORD + L9P_QWORD) + +static ssize_t l9p_iov_io(struct l9p_message *, void *, size_t); +static inline ssize_t l9p_pu8(struct l9p_message *, uint8_t *); +static inline ssize_t l9p_pu16(struct l9p_message *, uint16_t *); +static inline ssize_t l9p_pu32(struct l9p_message *, uint32_t *); +static inline ssize_t l9p_pu64(struct l9p_message *, uint64_t *); +static ssize_t l9p_pustring(struct l9p_message *, char **s); +static ssize_t l9p_pustrings(struct l9p_message *, uint16_t *, char **, size_t); +static ssize_t l9p_puqid(struct l9p_message *, struct l9p_qid *); +static ssize_t l9p_puqids(struct l9p_message *, uint16_t *, struct l9p_qid *q); + +/* + * Transfer data from incoming request, or to outgoing response, + * using msg to track position and direction within request/response. + * + * Returns the number of bytes actually transferred (which is always + * just len itself, converted to signed), or -1 if we ran out of space. + * + * Note that if we return -1, subsequent l9p_iov_io() calls with + * the same (and not-reset) msg and len > 0 will also return -1. + * This means most users can just check the *last* call for failure. + */ +static ssize_t +l9p_iov_io(struct l9p_message *msg, void *buffer, size_t len) +{ + size_t done = 0; + size_t left = len; + + assert(msg != NULL); + + if (len == 0) + return (0); + + if (msg->lm_cursor_iov >= msg->lm_niov) + return (-1); + + assert(buffer != NULL); + + while (left > 0) { + size_t idx = msg->lm_cursor_iov; + size_t space = msg->lm_iov[idx].iov_len - msg->lm_cursor_offset; + size_t towrite = MIN(space, left); + + if (msg->lm_mode == L9P_PACK) { + memcpy((char *)msg->lm_iov[idx].iov_base + + msg->lm_cursor_offset, (char *)buffer + done, + towrite); + } + + if (msg->lm_mode == L9P_UNPACK) { + memcpy((char *)buffer + done, + (char *)msg->lm_iov[idx].iov_base + + msg->lm_cursor_offset, towrite); + } + + msg->lm_cursor_offset += towrite; + + done += towrite; + left -= towrite; + + if (space - towrite == 0) { + /* Advance to next iov */ + msg->lm_cursor_iov++; + msg->lm_cursor_offset = 0; + + if (msg->lm_cursor_iov >= msg->lm_niov && left > 0) + return (-1); + } + } + + msg->lm_size += done; + return ((ssize_t)done); +} + +/* + * Pack or unpack a byte (8 bits). + * + * Returns 1 (success, 1 byte) or -1 (error). + */ +static inline ssize_t +l9p_pu8(struct l9p_message *msg, uint8_t *val) +{ + + return (l9p_iov_io(msg, val, sizeof (uint8_t))); +} + +/* + * Pack or unpack 16-bit value. + * + * Returns 2 or -1. + */ +static inline ssize_t +l9p_pu16(struct l9p_message *msg, uint16_t *val) +{ +#if _BYTE_ORDER != _LITTLE_ENDIAN + /* + * The ifdefs are annoying, but there is no need + * for all of this foolery on little-endian hosts, + * and I don't expect the compiler to optimize it + * all away. + */ + uint16_t copy; + ssize_t ret; + + if (msg->lm_mode == L9P_PACK) { + copy = htole16(*val); + return (l9p_iov_io(msg, ©, sizeof (uint16_t))); + } + ret = l9p_iov_io(msg, val, sizeof (uint16_t)); + *val = le16toh(*val); + return (ret); +#else + return (l9p_iov_io(msg, val, sizeof (uint16_t))); +#endif +} + +/* + * Pack or unpack 32-bit value. + * + * Returns 4 or -1. + */ +static inline ssize_t +l9p_pu32(struct l9p_message *msg, uint32_t *val) +{ +#if _BYTE_ORDER != _LITTLE_ENDIAN + uint32_t copy; + ssize_t ret; + + if (msg->lm_mode == L9P_PACK) { + copy = htole32(*val); + return (l9p_iov_io(msg, ©, sizeof (uint32_t))); + } + ret = l9p_iov_io(msg, val, sizeof (uint32_t)); + *val = le32toh(*val); + return (ret); +#else + return (l9p_iov_io(msg, val, sizeof (uint32_t))); +#endif +} + +/* + * Pack or unpack 64-bit value. + * + * Returns 8 or -1. + */ +static inline ssize_t +l9p_pu64(struct l9p_message *msg, uint64_t *val) +{ +#if _BYTE_ORDER != _LITTLE_ENDIAN + uint64_t copy; + ssize_t ret; + + if (msg->lm_mode == L9P_PACK) { + copy = htole64(*val); + return (l9p_iov_io(msg, ©, sizeof (uint64_t))); + } + ret = l9p_iov_io(msg, val, sizeof (uint32_t)); + *val = le64toh(*val); + return (ret); +#else + return (l9p_iov_io(msg, val, sizeof (uint64_t))); +#endif +} + +/* + * Pack or unpack a string, encoded as 2-byte length followed by + * string bytes. The returned length is 2 greater than the + * length of the string itself. + * + * When unpacking, this allocates a new string (NUL-terminated). + * + * Return -1 on error (not space, or failed to allocate string, + * or illegal string). + * + * Note that pustring (and hence pustrings) can return an error + * even when l9p_iov_io succeeds. + */ +static ssize_t +l9p_pustring(struct l9p_message *msg, char **s) +{ + uint16_t len; + + if (msg->lm_mode == L9P_PACK) + len = *s != NULL ? (uint16_t)strlen(*s) : 0; + + if (l9p_pu16(msg, &len) < 0) + return (-1); + + if (msg->lm_mode == L9P_UNPACK) { + *s = l9p_calloc(1, len + 1); + if (*s == NULL) + return (-1); + } + + if (l9p_iov_io(msg, *s, len) < 0) + return (-1); + + if (msg->lm_mode == L9P_UNPACK) { + /* + * An embedded NUL byte in a string is illegal. + * We don't necessarily have to check (we'll just + * treat it as a shorter string), but checking + * seems like a good idea. + */ + if (memchr(*s, '\0', len) != NULL) + return (-1); + } + + return ((ssize_t)len + 2); +} + +/* + * Pack or unpack a number (*num) of strings (but at most max of + * them). + * + * Returns the number of bytes transferred, including the packed + * number of strings. If packing and the packed number of strings + * was reduced, the original *num value is unchanged; only the + * wire-format number is reduced. If unpacking and the input + * number of strings exceeds the max, the incoming *num is reduced + * to lim, if needed. (NOTE ASYMMETRY HERE!) + * + * Returns -1 on error. + */ +static ssize_t +l9p_pustrings(struct l9p_message *msg, uint16_t *num, char **strings, + size_t max) +{ + size_t i, lim; + ssize_t r, ret; + uint16_t adjusted; + + if (msg->lm_mode == L9P_PACK) { + lim = *num; + if (lim > max) + lim = max; + adjusted = (uint16_t)lim; + r = l9p_pu16(msg, &adjusted); + } else { + r = l9p_pu16(msg, num); + lim = *num; + if (lim > max) + *num = (uint16_t)(lim = max); + } + if (r < 0) + return (-1); + + for (i = 0; i < lim; i++) { + ret = l9p_pustring(msg, &strings[i]); + if (ret < 1) + return (-1); + + r += ret; + } + + return (r); +} + +/* + * Pack or unpack a qid. + * + * Returns 13 (success) or -1 (error). + */ +static ssize_t +l9p_puqid(struct l9p_message *msg, struct l9p_qid *qid) +{ + ssize_t r; + uint8_t type; + + if (msg->lm_mode == L9P_PACK) { + type = qid->type; + r = l9p_pu8(msg, &type); + } else { + r = l9p_pu8(msg, &type); + qid->type = type; + } + if (r > 0) + r = l9p_pu32(msg, &qid->version); + if (r > 0) + r = l9p_pu64(msg, &qid->path); + + return (r > 0 ? QID_SIZE : r); +} + +/* + * Pack or unpack *num qids. + * + * Returns 2 + 13 * *num (after possibly setting *num), or -1 on error. + */ +static ssize_t +l9p_puqids(struct l9p_message *msg, uint16_t *num, struct l9p_qid *qids) +{ + size_t i, lim; + ssize_t ret, r; + + r = l9p_pu16(msg, num); + if (r > 0) { + for (i = 0, lim = *num; i < lim; i++) { + ret = l9p_puqid(msg, &qids[i]); + if (ret < 0) + return (-1); + r += ret; + } + } + return (r); +} + +/* + * Pack or unpack a l9p_stat. + * + * These have variable size, and the size further depends on + * the protocol version. + * + * Returns the number of bytes packed/unpacked, or -1 on error. + */ +ssize_t +l9p_pustat(struct l9p_message *msg, struct l9p_stat *stat, + enum l9p_version version) +{ + ssize_t r = 0; + uint16_t size; + + /* The on-wire size field excludes the size of the size field. */ + if (msg->lm_mode == L9P_PACK) + size = l9p_sizeof_stat(stat, version) - 2; + + r += l9p_pu16(msg, &size); + r += l9p_pu16(msg, &stat->type); + r += l9p_pu32(msg, &stat->dev); + r += l9p_puqid(msg, &stat->qid); + r += l9p_pu32(msg, &stat->mode); + r += l9p_pu32(msg, &stat->atime); + r += l9p_pu32(msg, &stat->mtime); + r += l9p_pu64(msg, &stat->length); + r += l9p_pustring(msg, &stat->name); + r += l9p_pustring(msg, &stat->uid); + r += l9p_pustring(msg, &stat->gid); + r += l9p_pustring(msg, &stat->muid); + + if (version >= L9P_2000U) { + r += l9p_pustring(msg, &stat->extension); + r += l9p_pu32(msg, &stat->n_uid); + r += l9p_pu32(msg, &stat->n_gid); + r += l9p_pu32(msg, &stat->n_muid); + } + + if (r < size + 2) + return (-1); + + return (r); +} + +/* + * Pack or unpack a variable-length dirent. + * + * If unpacking, the name field is malloc()ed and the caller must + * free it. + * + * Returns the wire-format length, or -1 if we ran out of room. + */ +ssize_t +l9p_pudirent(struct l9p_message *msg, struct l9p_dirent *de) +{ + ssize_t r, s; + + r = l9p_puqid(msg, &de->qid); + r += l9p_pu64(msg, &de->offset); + r += l9p_pu8(msg, &de->type); + s = l9p_pustring(msg, &de->name); + if (r < QID_SIZE + 8 + 1 || s < 0) + return (-1); + return (r + s); +} + +/* + * Pack or unpack a request or response (fcall). + * + * Returns 0 on success, -1 on error. (It's up to the caller + * to call l9p_freefcall on our failure.) + */ +int +l9p_pufcall(struct l9p_message *msg, union l9p_fcall *fcall, + enum l9p_version version) +{ + uint32_t length = 0; + ssize_t r; + + /* + * Get overall length, type, and tag, which should appear + * in all messages. If not even that works, abort immediately. + */ + l9p_pu32(msg, &length); + l9p_pu8(msg, &fcall->hdr.type); + r = l9p_pu16(msg, &fcall->hdr.tag); + if (r < 0) + return (-1); + + /* + * Decode remainder of message. When unpacking, this may + * allocate memory, even if we fail during the decode. + * Note that the initial fcall is zeroed out, though, so + * we can just freefcall() to release whatever might have + * gotten allocated, if the unpack fails due to a short + * packet. + */ + switch (fcall->hdr.type) { + case L9P_TVERSION: + case L9P_RVERSION: + l9p_pu32(msg, &fcall->version.msize); + r = l9p_pustring(msg, &fcall->version.version); + break; + + case L9P_TAUTH: + l9p_pu32(msg, &fcall->tauth.afid); + r = l9p_pustring(msg, &fcall->tauth.uname); + if (r < 0) + break; + r = l9p_pustring(msg, &fcall->tauth.aname); + if (r < 0) + break; + if (version >= L9P_2000U) + r = l9p_pu32(msg, &fcall->tauth.n_uname); + break; + + case L9P_RAUTH: + r = l9p_puqid(msg, &fcall->rauth.aqid); + break; + + case L9P_TATTACH: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu32(msg, &fcall->tattach.afid); + r = l9p_pustring(msg, &fcall->tattach.uname); + if (r < 0) + break; + r = l9p_pustring(msg, &fcall->tattach.aname); + if (r < 0) + break; + if (version >= L9P_2000U) + r = l9p_pu32(msg, &fcall->tattach.n_uname); + break; + + case L9P_RATTACH: + r = l9p_puqid(msg, &fcall->rattach.qid); + break; + + case L9P_RERROR: + r = l9p_pustring(msg, &fcall->error.ename); + if (r < 0) + break; + if (version >= L9P_2000U) + r = l9p_pu32(msg, &fcall->error.errnum); + break; + + case L9P_RLERROR: + r = l9p_pu32(msg, &fcall->error.errnum); + break; + + case L9P_TFLUSH: + r = l9p_pu16(msg, &fcall->tflush.oldtag); + break; + + case L9P_RFLUSH: + break; + + case L9P_TWALK: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu32(msg, &fcall->twalk.newfid); + r = l9p_pustrings(msg, &fcall->twalk.nwname, + fcall->twalk.wname, N(fcall->twalk.wname)); + break; + + case L9P_RWALK: + r = l9p_puqids(msg, &fcall->rwalk.nwqid, fcall->rwalk.wqid); + break; + + case L9P_TOPEN: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pu8(msg, &fcall->topen.mode); + break; + + case L9P_ROPEN: + l9p_puqid(msg, &fcall->ropen.qid); + r = l9p_pu32(msg, &fcall->ropen.iounit); + break; + + case L9P_TCREATE: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tcreate.name); + if (r < 0) + break; + l9p_pu32(msg, &fcall->tcreate.perm); + r = l9p_pu8(msg, &fcall->tcreate.mode); + if (version >= L9P_2000U) + r = l9p_pustring(msg, &fcall->tcreate.extension); + break; + + case L9P_RCREATE: + l9p_puqid(msg, &fcall->rcreate.qid); + r = l9p_pu32(msg, &fcall->rcreate.iounit); + break; + + case L9P_TREAD: + case L9P_TREADDIR: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu64(msg, &fcall->io.offset); + r = l9p_pu32(msg, &fcall->io.count); + break; + + case L9P_RREAD: + case L9P_RREADDIR: + r = l9p_pu32(msg, &fcall->io.count); + break; + + case L9P_TWRITE: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu64(msg, &fcall->io.offset); + r = l9p_pu32(msg, &fcall->io.count); + break; + + case L9P_RWRITE: + r = l9p_pu32(msg, &fcall->io.count); + break; + + case L9P_TCLUNK: + case L9P_TSTAT: + case L9P_TREMOVE: + case L9P_TSTATFS: + r = l9p_pu32(msg, &fcall->hdr.fid); + break; + + case L9P_RCLUNK: + case L9P_RREMOVE: + break; + + case L9P_RSTAT: + { + uint16_t size = l9p_sizeof_stat(&fcall->rstat.stat, + version); + l9p_pu16(msg, &size); + r = l9p_pustat(msg, &fcall->rstat.stat, version); + } + break; + + case L9P_TWSTAT: + { + uint16_t size; + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu16(msg, &size); + r = l9p_pustat(msg, &fcall->twstat.stat, version); + } + break; + + case L9P_RWSTAT: + break; + + case L9P_RSTATFS: + l9p_pu32(msg, &fcall->rstatfs.statfs.type); + l9p_pu32(msg, &fcall->rstatfs.statfs.bsize); + l9p_pu64(msg, &fcall->rstatfs.statfs.blocks); + l9p_pu64(msg, &fcall->rstatfs.statfs.bfree); + l9p_pu64(msg, &fcall->rstatfs.statfs.bavail); + l9p_pu64(msg, &fcall->rstatfs.statfs.files); + l9p_pu64(msg, &fcall->rstatfs.statfs.ffree); + l9p_pu64(msg, &fcall->rstatfs.statfs.fsid); + r = l9p_pu32(msg, &fcall->rstatfs.statfs.namelen); + break; + + case L9P_TLOPEN: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pu32(msg, &fcall->tlopen.flags); + break; + + case L9P_RLOPEN: + l9p_puqid(msg, &fcall->rlopen.qid); + r = l9p_pu32(msg, &fcall->rlopen.iounit); + break; + + case L9P_TLCREATE: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tlcreate.name); + if (r < 0) + break; + l9p_pu32(msg, &fcall->tlcreate.flags); + l9p_pu32(msg, &fcall->tlcreate.mode); + r = l9p_pu32(msg, &fcall->tlcreate.gid); + break; + + case L9P_RLCREATE: + l9p_puqid(msg, &fcall->rlcreate.qid); + r = l9p_pu32(msg, &fcall->rlcreate.iounit); + break; + + case L9P_TSYMLINK: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tsymlink.name); + if (r < 0) + break; + r = l9p_pustring(msg, &fcall->tsymlink.symtgt); + if (r < 0) + break; + r = l9p_pu32(msg, &fcall->tlcreate.gid); + break; + + case L9P_RSYMLINK: + r = l9p_puqid(msg, &fcall->rsymlink.qid); + break; + + case L9P_TMKNOD: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tmknod.name); + if (r < 0) + break; + l9p_pu32(msg, &fcall->tmknod.mode); + l9p_pu32(msg, &fcall->tmknod.major); + l9p_pu32(msg, &fcall->tmknod.minor); + r = l9p_pu32(msg, &fcall->tmknod.gid); + break; + + case L9P_RMKNOD: + r = l9p_puqid(msg, &fcall->rmknod.qid); + break; + + case L9P_TRENAME: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu32(msg, &fcall->trename.dfid); + r = l9p_pustring(msg, &fcall->trename.name); + break; + + case L9P_RRENAME: + break; + + case L9P_TREADLINK: + r = l9p_pu32(msg, &fcall->hdr.fid); + break; + + case L9P_RREADLINK: + r = l9p_pustring(msg, &fcall->rreadlink.target); + break; + + case L9P_TGETATTR: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pu64(msg, &fcall->tgetattr.request_mask); + break; + + case L9P_RGETATTR: + l9p_pu64(msg, &fcall->rgetattr.valid); + l9p_puqid(msg, &fcall->rgetattr.qid); + l9p_pu32(msg, &fcall->rgetattr.mode); + l9p_pu32(msg, &fcall->rgetattr.uid); + l9p_pu32(msg, &fcall->rgetattr.gid); + l9p_pu64(msg, &fcall->rgetattr.nlink); + l9p_pu64(msg, &fcall->rgetattr.rdev); + l9p_pu64(msg, &fcall->rgetattr.size); + l9p_pu64(msg, &fcall->rgetattr.blksize); + l9p_pu64(msg, &fcall->rgetattr.blocks); + l9p_pu64(msg, &fcall->rgetattr.atime_sec); + l9p_pu64(msg, &fcall->rgetattr.atime_nsec); + l9p_pu64(msg, &fcall->rgetattr.mtime_sec); + l9p_pu64(msg, &fcall->rgetattr.mtime_nsec); + l9p_pu64(msg, &fcall->rgetattr.ctime_sec); + l9p_pu64(msg, &fcall->rgetattr.ctime_nsec); + l9p_pu64(msg, &fcall->rgetattr.btime_sec); + l9p_pu64(msg, &fcall->rgetattr.btime_nsec); + l9p_pu64(msg, &fcall->rgetattr.gen); + r = l9p_pu64(msg, &fcall->rgetattr.data_version); + break; + + case L9P_TSETATTR: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu32(msg, &fcall->tsetattr.valid); + l9p_pu32(msg, &fcall->tsetattr.mode); + l9p_pu32(msg, &fcall->tsetattr.uid); + l9p_pu32(msg, &fcall->tsetattr.gid); + l9p_pu64(msg, &fcall->tsetattr.size); + l9p_pu64(msg, &fcall->tsetattr.atime_sec); + l9p_pu64(msg, &fcall->tsetattr.atime_nsec); + l9p_pu64(msg, &fcall->tsetattr.mtime_sec); + r = l9p_pu64(msg, &fcall->tsetattr.mtime_nsec); + break; + + case L9P_RSETATTR: + break; + + case L9P_TXATTRWALK: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu32(msg, &fcall->txattrwalk.newfid); + r = l9p_pustring(msg, &fcall->txattrwalk.name); + break; + + case L9P_RXATTRWALK: + r = l9p_pu64(msg, &fcall->rxattrwalk.size); + break; + + case L9P_TXATTRCREATE: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->txattrcreate.name); + if (r < 0) + break; + l9p_pu64(msg, &fcall->txattrcreate.attr_size); + r = l9p_pu32(msg, &fcall->txattrcreate.flags); + break; + + case L9P_RXATTRCREATE: + break; + + case L9P_TFSYNC: + r = l9p_pu32(msg, &fcall->hdr.fid); + break; + + case L9P_RFSYNC: + break; + + case L9P_TLOCK: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu8(msg, &fcall->tlock.type); + l9p_pu32(msg, &fcall->tlock.flags); + l9p_pu64(msg, &fcall->tlock.start); + l9p_pu64(msg, &fcall->tlock.length); + l9p_pu32(msg, &fcall->tlock.proc_id); + r = l9p_pustring(msg, &fcall->tlock.client_id); + break; + + case L9P_RLOCK: + r = l9p_pu8(msg, &fcall->rlock.status); + break; + + case L9P_TGETLOCK: + l9p_pu32(msg, &fcall->hdr.fid); + /* FALLTHROUGH */ + + case L9P_RGETLOCK: + l9p_pu8(msg, &fcall->getlock.type); + l9p_pu64(msg, &fcall->getlock.start); + l9p_pu64(msg, &fcall->getlock.length); + l9p_pu32(msg, &fcall->getlock.proc_id); + r = l9p_pustring(msg, &fcall->getlock.client_id); + break; + + case L9P_TLINK: + l9p_pu32(msg, &fcall->tlink.dfid); + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tlink.name); + break; + + case L9P_RLINK: + break; + + case L9P_TMKDIR: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tmkdir.name); + if (r < 0) + break; + l9p_pu32(msg, &fcall->tmkdir.mode); + r = l9p_pu32(msg, &fcall->tmkdir.gid); + break; + + case L9P_RMKDIR: + r = l9p_puqid(msg, &fcall->rmkdir.qid); + break; + + case L9P_TRENAMEAT: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->trenameat.oldname); + if (r < 0) + break; + l9p_pu32(msg, &fcall->trenameat.newdirfid); + r = l9p_pustring(msg, &fcall->trenameat.newname); + break; + + case L9P_RRENAMEAT: + break; + + case L9P_TUNLINKAT: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tunlinkat.name); + if (r < 0) + break; + r = l9p_pu32(msg, &fcall->tunlinkat.flags); + break; + + case L9P_RUNLINKAT: + break; + + default: + L9P_LOG(L9P_ERROR, "%s(): missing case for type %d", + __func__, fcall->hdr.type); + break; + } + + /* Check for over- or under-run, or pustring error. */ + if (r < 0) + return (-1); + + if (msg->lm_mode == L9P_PACK) { + /* Rewind to the beginning and install size at front. */ + uint32_t len = (uint32_t)msg->lm_size; + msg->lm_cursor_offset = 0; + msg->lm_cursor_iov = 0; + + /* + * Subtract 4 bytes from current size, becase we're + * overwriting size (rewinding message to the beginning) + * and writing again, which will increase it 4 more. + */ + msg->lm_size -= sizeof(uint32_t); + + if (fcall->hdr.type == L9P_RREAD || + fcall->hdr.type == L9P_RREADDIR) + len += fcall->io.count; + + l9p_pu32(msg, &len); + } + + return (0); +} + +/* + * Free any strings or other data malloc'ed in the process of + * packing or unpacking an fcall. + */ +void +l9p_freefcall(union l9p_fcall *fcall) +{ + uint16_t i; + + switch (fcall->hdr.type) { + + case L9P_TVERSION: + case L9P_RVERSION: + free(fcall->version.version); + return; + + case L9P_TATTACH: + free(fcall->tattach.aname); + free(fcall->tattach.uname); + return; + + case L9P_TWALK: + for (i = 0; i < fcall->twalk.nwname; i++) + free(fcall->twalk.wname[i]); + return; + + case L9P_TCREATE: + case L9P_TOPEN: + free(fcall->tcreate.name); + free(fcall->tcreate.extension); + return; + + case L9P_RSTAT: + l9p_freestat(&fcall->rstat.stat); + return; + + case L9P_TWSTAT: + l9p_freestat(&fcall->twstat.stat); + return; + + case L9P_TLCREATE: + free(fcall->tlcreate.name); + return; + + case L9P_TSYMLINK: + free(fcall->tsymlink.name); + free(fcall->tsymlink.symtgt); + return; + + case L9P_TMKNOD: + free(fcall->tmknod.name); + return; + + case L9P_TRENAME: + free(fcall->trename.name); + return; + + case L9P_RREADLINK: + free(fcall->rreadlink.target); + return; + + case L9P_TXATTRWALK: + free(fcall->txattrwalk.name); + return; + + case L9P_TXATTRCREATE: + free(fcall->txattrcreate.name); + return; + + case L9P_TLOCK: + free(fcall->tlock.client_id); + return; + + case L9P_TGETLOCK: + case L9P_RGETLOCK: + free(fcall->getlock.client_id); + return; + + case L9P_TLINK: + free(fcall->tlink.name); + return; + + case L9P_TMKDIR: + free(fcall->tmkdir.name); + return; + + case L9P_TRENAMEAT: + free(fcall->trenameat.oldname); + free(fcall->trenameat.newname); + return; + + case L9P_TUNLINKAT: + free(fcall->tunlinkat.name); + return; + } +} + +void +l9p_freestat(struct l9p_stat *stat) +{ + free(stat->name); + free(stat->extension); + free(stat->uid); + free(stat->gid); + free(stat->muid); +} + +uint16_t +l9p_sizeof_stat(struct l9p_stat *stat, enum l9p_version version) +{ + uint16_t size = L9P_WORD /* size */ + + L9P_WORD /* type */ + + L9P_DWORD /* dev */ + + QID_SIZE /* qid */ + + 3 * L9P_DWORD /* mode, atime, mtime */ + + L9P_QWORD /* length */ + + STRING_SIZE(stat->name) + + STRING_SIZE(stat->uid) + + STRING_SIZE(stat->gid) + + STRING_SIZE(stat->muid); + + if (version >= L9P_2000U) { + size += STRING_SIZE(stat->extension) + + 3 * L9P_DWORD; + } + + return (size); +} Index: lib/lib9p/pytest/.gitignore =================================================================== --- /dev/null +++ lib/lib9p/pytest/.gitignore @@ -0,0 +1,3 @@ +*.pyc +__pycache__ +testconf.ini Index: lib/lib9p/pytest/Makefile =================================================================== --- /dev/null +++ lib/lib9p/pytest/Makefile @@ -0,0 +1,9 @@ +PYTHON?=python + +selftest: + for f in lerrno p9err pfod protocol sequencer; do \ + ${PYTHON} $$f.py; \ + done + +clean cleandir: + rm -rf *.pyc __pycache__ *.log Index: lib/lib9p/pytest/README =================================================================== --- /dev/null +++ lib/lib9p/pytest/README @@ -0,0 +1,32 @@ +Here are some very skeletal instructions for using +the client test code. + +on server (assumes BSD style LD_LIBRARY_PATH): + +mkdir /tmp/foo +cd lib9p +env LD_LIBRARY_PATH=. LIB9P_LOGGING=stderr example/server -h localhost -p 12345 /tmp/foo + +(this can be run as a non-root user for now, but some things +only work when run as root) + +on client (same machine as server, but can always be run as +non-root user): + +cd lib9p/pytest +ONE TIME ONLY: copy testconf.ini.sample to testconf.ini, adjust to taste +./client.py + +TODO: rework ./client so it can locate the .ini file better + +######## + +IF USING diod (http://github.com/chaos/diod) AS THE SERVER ON +A LINUX MACHINE: + + - The instructions for running the server are (or were): + sudo ./diod -f -d 1 -n -e /tmp/9 + - You must mkdir the exported 9pfs file system (e.g., mkdir /tmp/9). + - While uname is not really used, aname (the attach name) IS used + and must match the exported file system, e.g., testconf.ini + must have "aname = /tmp/9". Index: lib/lib9p/pytest/client.py =================================================================== --- /dev/null +++ lib/lib9p/pytest/client.py @@ -0,0 +1,643 @@ +#! /usr/bin/env python + +""" +Run various tests, as a client. +""" + +from __future__ import print_function + +import argparse +try: + import ConfigParser as configparser +except ImportError: + import configparser +import functools +import logging +import os +import socket +import struct +import sys +import time +import traceback + +import p9conn +import protocol + +LocalError = p9conn.LocalError +RemoteError = p9conn.RemoteError +TEError = p9conn.TEError + +class TestState(object): + def __init__(self): + self.config = None + self.logger = None + self.successes = 0 + self.skips = 0 + self.failures = 0 + self.exceptions = 0 + self.clnt_tab = {} + self.mkclient = None + self.stop = False + self.gid = 0 + + def ccc(self, cid=None): + """ + Connect or reconnect as client (ccc = check and connect client). + + If caller provides a cid (client ID) we check that specific + client. Otherwise the default ID ('base') is used. + In any case we return the now-connected client, plus the + attachment (session info) if any. + """ + if cid is None: + cid = 'base' + pair = self.clnt_tab.get(cid) + if pair is None: + clnt = self.mkclient() + pair = [clnt, None] + self.clnt_tab[cid] = pair + else: + clnt = pair[0] + if not clnt.is_connected(): + clnt.connect() + return pair + + def dcc(self, cid=None): + """ + Disconnect client (disconnect checked client). If no specific + client ID is provided, this disconnects ALL checked clients! + """ + if cid is None: + for cid in list(self.clnt_tab.keys()): + self.dcc(cid) + pair = self.clnt_tab.get(cid) + if pair is not None: + clnt = pair[0] + if clnt.is_connected(): + clnt.shutdown() + del self.clnt_tab[cid] + + def ccs(self, cid=None): + """ + Like ccc, but establish a session as well, by setting up + the uname/n_uname. + + Return the client instance (only). + """ + pair = self.ccc(cid) + clnt = pair[0] + if pair[1] is None: + # No session yet - establish one. Note, this may fail. + section = None if cid is None else ('client-' + cid) + aname = getconf(self.config, section, 'aname', '') + uname = getconf(self.config, section, 'uname', '') + if clnt.proto > protocol.plain: + n_uname = getint(self.config, section, 'n_uname', 1001) + else: + n_uname = None + clnt.attach(afid=None, aname=aname, uname=uname, n_uname=n_uname) + pair[1] = (aname, uname, n_uname) + return clnt + +def getconf(conf, section, name, default=None, rtype=str): + """ + Get configuration item for given section, or for "client" if + there is no entry for that particular section (or if section + is None). + + This lets us get specific values for specific tests or + groups ([foo] name=value), falling back to general values + ([client] name=value). + + The type of the returned value can be str, int, bool, + or float. The default is str (and see getconfint, getconfbool, + getconffloat below). + + A default value may be supplied; if it is, that's the default + return value (this default should have the right type). If + no default is supplied, a missing value is an error. + """ + try: + # note: conf.get(None, 'foo') raises NoSectionError + where = section + result = conf.get(where, name) + except (configparser.NoSectionError, configparser.NoOptionError): + try: + where = 'client' + result = conf.get(where, name) + except configparser.NoSectionError: + sys.exit('no [{0}] section in configuration!'.format(where)) + except configparser.NoOptionError: + if default is not None: + return default + if section is not None: + where = '[{0}] or [{1}]'.format(section, where) + else: + where = '[{0}]'.format(where) + raise LocalError('need {0}=value in {1}'.format(name, where)) + where = '[{0}]'.format(where) + if rtype is str: + return result + if rtype is int: + return int(result) + if rtype is float: + return float(result) + if rtype is bool: + if result.lower() in ('1', 't', 'true', 'y', 'yes'): + return True + if result.lower() in ('0', 'f', 'false', 'n', 'no'): + return False + raise ValueError('{0} {1}={2}: invalid boolean'.format(where, name, + result)) + raise ValueError('{0} {1}={2}: internal error: bad result type ' + '{3!r}'.format(where, name, result, rtype)) + +def getint(conf, section, name, default=None): + "get integer config item" + return getconf(conf, section, name, default, int) + +def getfloat(conf, section, name, default=None): + "get float config item" + return getconf(conf, section, name, default, float) + +def getbool(conf, section, name, default=None): + "get boolean config item" + return getconf(conf, section, name, default, bool) + +def pluralize(n, singular, plural): + "return singular or plural based on value of n" + return plural if n != 1 else singular + +class TCDone(Exception): + "used in succ/fail/skip - skips rest of testcase with" + pass + +class TestCase(object): + """ + Start a test case. Most callers must then do a ccs() to connect. + + A failed test will generally disconnect from the server; a + new ccs() will reconnect, if the server is still alive. + """ + def __init__(self, name, tstate): + self.name = name + self.status = None + self.detail = None + self.tstate = tstate + self._shutdown = None + self._autoclunk = None + self._acconn = None + + def auto_disconnect(self, conn): + self._shutdown = conn + + def succ(self, detail=None): + "set success status" + self.status = 'SUCC' + self.detail = detail + raise TCDone() + + def fail(self, detail): + "set failure status" + self.status = 'FAIL' + self.detail = detail + raise TCDone() + + def skip(self, detail=None): + "set skip status" + self.status = 'SKIP' + self.detail = detail + raise TCDone() + + def autoclunk(self, fid): + "mark fid to be closed/clunked on test exit" + if self._acconn is None: + raise ValueError('autoclunk: no _acconn') + self._autoclunk.append(fid) + + def trace(self, msg, *args, **kwargs): + "add tracing info to log-file output" + level = kwargs.pop('level', logging.INFO) + self.tstate.logger.log(level, ' ' + msg, *args, **kwargs) + + def ccs(self): + "call tstate ccs, turn socket.error connect failure into test fail" + try: + self.detail = 'connecting' + ret = self.tstate.ccs() + self.detail = None + self._acconn = ret + return ret + except socket.error as err: + self.fail(str(err)) + + def __enter__(self): + self.tstate.logger.log(logging.DEBUG, 'ENTER: %s', self.name) + self._autoclunk = [] + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + tstate = self.tstate + eat_exc = False + tb_detail = None + if exc_type is TCDone: + # we exited with succ, fail, or skip + eat_exc = True + exc_type = None + if exc_type is not None: + if self.status is None: + self.status = 'EXCP' + else: + self.status += ' EXC' + if exc_type == TEError: + # timeout/eof - best guess is that we crashed the server! + eat_exc = True + tb_detail = ['timeout or EOF'] + elif exc_type in (socket.error, RemoteError, LocalError): + eat_exc = True + tb_detail = traceback.format_exception(exc_type, exc_val, + exc_tb) + level = logging.ERROR + tstate.failures += 1 + tstate.exceptions += 1 + else: + if self.status is None: + self.status = 'SUCC' + if self.status == 'SUCC': + level = logging.INFO + tstate.successes += 1 + elif self.status == 'SKIP': + level = logging.INFO + tstate.skips += 1 + else: + level = logging.ERROR + tstate.failures += 1 + tstate.logger.log(level, '%s: %s', self.status, self.name) + if self.detail: + tstate.logger.log(level, ' detail: %s', self.detail) + if tb_detail: + for line in tb_detail: + tstate.logger.log(level, ' %s', line.rstrip()) + for fid in self._autoclunk: + self._acconn.clunk(fid, ignore_error=True) + if self._shutdown: + self._shutdown.shutdown() + return eat_exc + +def main(): + "the usual main" + parser = argparse.ArgumentParser(description='run tests against a server') + + parser.add_argument('-c', '--config', + action='append', + help='specify additional file(s) to read (beyond testconf.ini)') + + args = parser.parse_args() + config = configparser.SafeConfigParser() + # use case sensitive keys + config.optionxform = str + + try: + with open('testconf.ini', 'r') as stream: + config.readfp(stream) + except (OSError, IOError) as err: + sys.exit(str(err)) + if args.config: + ok = config.read(args.config) + failed = set(ok) - set(args.config) + if len(failed): + nfailed = len(failed) + word = 'files' if nfailed > 1 else 'file' + failed = ', '.join(failed) + print('failed to read {0} {1}: {2}'.format(nfailed, word, failed)) + sys.exit(1) + + logging.basicConfig(level=config.get('client', 'loglevel').upper()) + logger = logging.getLogger(__name__) + tstate = TestState() + tstate.logger = logger + tstate.config = config + + server = config.get('client', 'server') + port = config.getint('client', 'port') + proto = config.get('client', 'protocol') + may_downgrade = config.getboolean('client', 'may_downgrade') + timeout = config.getfloat('client', 'timeout') + + tstate.stop = True # unless overwritten below + with TestCase('send bad packet', tstate) as tc: + tc.detail = 'connecting to {0}:{1}'.format(server, port) + try: + conn = p9conn.P9SockIO(logger, server=server, port=port) + except socket.error as err: + tc.fail('cannot connect at all (server down?)') + tc.auto_disconnect(conn) + tc.detail = None + pkt = struct.pack(' protocol.plain: + n_uname = getint(tstate.config, section, 'n_uname', 1001) + else: + n_uname = None + try: + clnt.attach(afid=42, aname=aname, uname=uname, n_uname=n_uname) + except RemoteError as err: + tc.succ(err.args[0]) + tc.dcc() + tc.fail('bad attach afid not rejected') + + try: + if not tstate.stop: + # Various Linux tests need gids. Just get them for everyone. + tstate.gid = getint(tstate.config, 'client', 'gid', 0) + more_test_cases(tstate) + finally: + tstate.dcc() + + n_tests = tstate.successes + tstate.failures + print('summary:') + if tstate.successes: + print('{0}/{1} tests succeeded'.format(tstate.successes, n_tests)) + if tstate.failures: + print('{0}/{1} tests failed'.format(tstate.failures, n_tests)) + if tstate.skips: + print('{0} {1} skipped'.format(tstate.skips, + pluralize(tstate.skips, + 'test', 'tests'))) + if tstate.exceptions: + print('{0} {1} occurred'.format(tstate.exceptions, + pluralize(tstate.exceptions, + 'exception', 'exceptions'))) + if tstate.stop: + print('tests stopped early') + return 1 if tstate.stop or tstate.exceptions or tstate.failures else 0 + +def more_test_cases(tstate): + "run cases that can only proceed if connecting works at all" + with TestCase('attach normally', tstate) as tc: + tc.ccs() + tc.succ() + if tstate.stop: + return + + # Empty string is not technically illegal. It's not clear + # whether it should be accepted or rejected. However, it + # used to crash the server entirely, so it's a desirable + # test case. + with TestCase('empty string in Twalk request', tstate) as tc: + clnt = tc.ccs() + try: + fid, qid = clnt.lookup(clnt.rootfid, [b'']) + except RemoteError as err: + tc.succ(err.args[0]) + clnt.clunk(fid) + tc.succ('note: empty Twalk component name not rejected') + + # Name components may not contain / + with TestCase('embedded / in lookup component name', tstate) as tc: + clnt = tc.ccs() + try: + fid, qid = clnt.lookup(clnt.rootfid, [b'/']) + tc.autoclunk(fid) + except RemoteError as err: + tc.succ(err.args[0]) + tc.fail('/ in lookup component name not rejected') + + # Proceed from a clean tree. As a side effect, this also tests + # either the old style readdir (read() on a directory fid) or + # the dot-L readdir(). + # + # The test case will fail if we don't have permission to remove + # some file(s). + with TestCase('clean up tree (readdir+remove)', tstate) as tc: + clnt = tc.ccs() + fset = clnt.uxreaddir(b'/') + fset = [i for i in fset if i != '.' and i != '..'] + tc.trace("what's there initially: {0!r}".format(fset)) + try: + clnt.uxremove(b'/', force=False, recurse=True) + except RemoteError as err: + tc.trace('failed to read or clean up tree', level=logging.ERROR) + tc.trace('this might be a permissions error', level=logging.ERROR) + tstate.stop = True + tc.fail(str(err)) + fset = clnt.uxreaddir(b'/') + fset = [i for i in fset if i != '.' and i != '..'] + tc.trace("what's left after removing everything: {0!r}".format(fset)) + if fset: + tstate.stop = True + tc.trace('note: could be a permissions error', level=logging.ERROR) + tc.fail('/ not empty after removing all: {0!r}'.format(fset)) + tc.succ() + if tstate.stop: + return + + # Name supplied to create, mkdir, etc, may not contain /. + # Note that this test may fail for the wrong reason if /dir + # itself does not already exist, so first let's make /dir. + only_dotl = getbool(tstate.config, 'client', 'only_dotl', False) + with TestCase('mkdir', tstate) as tc: + clnt = tc.ccs() + if only_dotl and not clnt.supports(protocol.td.Tmkdir): + tc.skip('cannot test dot-L mkdir on {0}'.format(clnt.proto)) + try: + fid, qid = clnt.uxlookup(b'/dir', None) + tc.autoclunk(fid) + tstate.stop = True + tc.fail('found existing /dir after cleaning tree') + except RemoteError as err: + # we'll just assume it's "no such file or directory" + pass + if only_dotl: + qid = clnt.mkdir(clnt.rootfid, b'dir', 0o777, tstate.gid) + else: + qid, _ = clnt.create(clnt.rootfid, b'dir', + protocol.td.DMDIR | 0o777, + protocol.td.OREAD) + if qid.type != protocol.td.QTDIR: + tstate.stop = True + tc.fail('creating /dir: result is not a directory') + tc.trace('now attempting to create /dir/sub the wrong way') + try: + if only_dotl: + qid = clnt.mkdir(clnt.rootfid, b'dir/sub', 0o777, tstate.gid) + else: + qid, _ = clnt.create(clnt.rootfid, b'dir/sub', + protocol.td.DMDIR | 0o777, + protocol.td.OREAD) + # it's not clear what happened on the server at this point! + tc.trace("creating dir/sub (with embedded '/') should have " + 'failed but did not') + tstate.stop = True + fset = clnt.uxreaddir(b'/dir') + if 'sub' in fset: + tc.trace('(found our dir/sub detritus)') + clnt.uxremove(b'dir/sub', force=True) + fset = clnt.uxreaddir(b'/dir') + if 'sub' not in fset: + tc.trace('(successfully removed our dir/sub detritus)') + tstate.stop = False + tc.fail('created dir/sub as single directory with embedded slash') + except RemoteError as err: + # we'll just assume it's the right kind of error + tc.trace('invalid path dir/sub failed with: %s', str(err)) + tc.succ('embedded slash in mkdir correctly refused') + if tstate.stop: + return + + with TestCase('getattr/setattr', tstate) as tc: + # This test is not really thorough enough, need to test + # all combinations of settings. Should also test that + # old values are restored on failure, although it is not + # clear how to trigger failures. + clnt = tc.ccs() + if not clnt.supports(protocol.td.Tgetattr): + tc.skip('%s does not support Tgetattr', clnt) + fid, _, _, _ = clnt.uxopen(b'/dir/file', os.O_CREAT | os.O_RDWR, 0o666, + gid=tstate.gid) + tc.autoclunk(fid) + written = clnt.write(fid, 0, 'bytes\n') + if written != 6: + tc.trace('expected to write 6 bytes, actually wrote %d', written, + level=logging.WARN) + attrs = clnt.Tgetattr(fid) + #tc.trace('getattr: after write, before setattr: got %s', attrs) + if attrs.size != written: + tc.fail('getattr: expected size=%d, got size=%d', + written, attrs.size) + # now truncate, set mtime to (3,14), and check result + set_time_to = p9conn.Timespec(sec=0, nsec=140000000) + clnt.Tsetattr(fid, size=0, mtime=set_time_to) + attrs = clnt.Tgetattr(fid) + #tc.trace('getattr: after setattr: got %s', attrs) + if attrs.mtime.sec != set_time_to.sec or attrs.size != 0: + tc.fail('setattr: expected to get back mtime.sec={0}, size=0; ' + 'got mtime.sec={1}, size=' + '{1}'.format(set_time_to.sec, attrs.mtime.sec, attrs.size)) + # nsec is not as stable but let's check + if attrs.mtime.nsec != set_time_to.nsec: + tc.trace('setattr: expected to get back mtime_nsec=%d; ' + 'got %d', set_time_to.nsec, mtime_nsec) + tc.succ('able to set and see size and mtime') + + # this test should be much later, but we know the current + # server is broken... + with TestCase('rename adjusts other fids', tstate) as tc: + clnt = tc.ccs() + dirfid, _ = clnt.uxlookup(b'/dir') + tc.autoclunk(dirfid) + clnt.uxmkdir(b'd1', 0o777, tstate.gid, startdir=dirfid) + clnt.uxmkdir(b'd1/sub', 0o777, tstate.gid, startdir=dirfid) + d1fid, _ = clnt.uxlookup(b'd1', dirfid) + tc.autoclunk(d1fid) + subfid, _ = clnt.uxlookup(b'sub', d1fid) + tc.autoclunk(subfid) + fid, _, _, _ = clnt.uxopen(b'file', os.O_CREAT | os.O_RDWR, + 0o666, startdir=subfid, gid=tstate.gid) + tc.autoclunk(fid) + written = clnt.write(fid, 0, 'filedata\n') + if written != 9: + tc.trace('expected to write 9 bytes, actually wrote %d', written, + level=logging.WARN) + # Now if we rename /dir/d1 to /dir/d2, the fids for both + # sub/file and sub itself should still be usable. This + # holds for both Trename (Linux only) and Twstat based + # rename ops. + # + # Note that some servers may cache some number of files and/or + # diretories held open, so we should open many fids to wipe + # out the cache (XXX notyet). + if clnt.supports(protocol.td.Trename): + clnt.rename(d1fid, dirfid, name=b'd2') + else: + clnt.wstat(d1fid, name=b'd2') + try: + rofid, _, _, _ = clnt.uxopen(b'file', os.O_RDONLY, startdir=subfid) + clnt.clunk(rofid) + except RemoteError as err: + tc.fail('open file in renamed dir/d2/sub: {0}'.format(err)) + tc.succ() + + # Even if xattrwalk is supported by the protocol, it's optional + # on the server. + with TestCase('xattrwalk', tstate) as tc: + clnt = tc.ccs() + if not clnt.supports(protocol.td.Txattrwalk): + tc.skip('{0} does not support Txattrwalk'.format(clnt)) + dirfid, _ = clnt.uxlookup(b'/dir') + tc.autoclunk(dirfid) + try: + # need better tests... + attrfid, size = clnt.xattrwalk(dirfid) + tc.autoclunk(attrfid) + data = clnt.read(attrfid, 0, size) + tc.trace('xattrwalk with no name: data=%r', data) + tc.succ('xattrwalk size={0} datalen={1}'.format(size, len(data))) + except RemoteError as err: + tc.trace('xattrwalk on /dir: {0}'.format(err)) + tc.succ('xattrwalk apparently not implemented') + +if __name__ == '__main__': + try: + sys.exit(main()) + except KeyboardInterrupt: + sys.exit('\nInterrupted') Index: lib/lib9p/pytest/lerrno.py =================================================================== --- /dev/null +++ lib/lib9p/pytest/lerrno.py @@ -0,0 +1,291 @@ +#! /usr/bin/env python + +""" +Error number definitions for Linux. +""" + +EPERM = 1 +ENOENT = 2 +ESRCH = 3 +EINTR = 4 +EIO = 5 +ENXIO = 6 +E2BIG = 7 +ENOEXEC = 8 +EBADF = 9 +ECHILD = 10 +EAGAIN = 11 +ENOMEM = 12 +EACCES = 13 +EFAULT = 14 +ENOTBLK = 15 +EBUSY = 16 +EEXIST = 17 +EXDEV = 18 +ENODEV = 19 +ENOTDIR = 20 +EISDIR = 21 +EINVAL = 22 +ENFILE = 23 +EMFILE = 24 +ENOTTY = 25 +ETXTBSY = 26 +EFBIG = 27 +ENOSPC = 28 +ESPIPE = 29 +EROFS = 30 +EMLINK = 31 +EPIPE = 32 +EDOM = 33 +ERANGE = 34 +EDEADLK = 35 +ENAMETOOLONG = 36 +ENOLCK = 37 +ENOSYS = 38 +ENOTEMPTY = 39 +ELOOP = 40 +# 41 unused +ENOMSG = 42 +EIDRM = 43 +ECHRNG = 44 +EL2NSYNC = 45 +EL3HLT = 46 +EL3RST = 47 +ELNRNG = 48 +EUNATCH = 49 +ENOCSI = 50 +EL2HLT = 51 +EBADE = 52 +EBADR = 53 +EXFULL = 54 +ENOANO = 55 +EBADRQC = 56 +EBADSLT = 57 +# 58 unused +EBFONT = 59 +ENOSTR = 60 +ENODATA = 61 +ETIME = 62 +ENOSR = 63 +ENONET = 64 +ENOPKG = 65 +EREMOTE = 66 +ENOLINK = 67 +EADV = 68 +ESRMNT = 69 +ECOMM = 70 +EPROTO = 71 +EMULTIHOP = 72 +EDOTDOT = 73 +EBADMSG = 74 +EOVERFLOW = 75 +ENOTUNIQ = 76 +EBADFD = 77 +EREMCHG = 78 +ELIBACC = 79 +ELIBBAD = 80 +ELIBSCN = 81 +ELIBMAX = 82 +ELIBEXEC = 83 +EILSEQ = 84 +ERESTART = 85 +ESTRPIPE = 86 +EUSERS = 87 +ENOTSOCK = 88 +EDESTADDRREQ = 89 +EMSGSIZE = 90 +EPROTOTYPE = 91 +ENOPROTOOPT = 92 +EPROTONOSUPPORT = 93 +ESOCKTNOSUPPORT = 94 +EOPNOTSUPP = 95 +EPFNOSUPPORT = 96 +EAFNOSUPPORT = 97 +EADDRINUSE = 98 +EADDRNOTAVAIL = 99 +ENETDOWN = 100 +ENETUNREACH = 101 +ENETRESET = 102 +ECONNABORTED = 103 +ECONNRESET = 104 +ENOBUFS = 105 +EISCONN = 106 +ENOTCONN = 107 +ESHUTDOWN = 108 +ETOOMANYREFS = 109 +ETIMEDOUT = 110 +ECONNREFUSED = 111 +EHOSTDOWN = 112 +EHOSTUNREACH = 113 +EALREADY = 114 +EINPROGRESS = 115 +ESTALE = 116 +EUCLEAN = 117 +ENOTNAM = 118 +ENAVAIL = 119 +EISNAM = 120 +EREMOTEIO = 121 +EDQUOT = 122 +ENOMEDIUM = 123 +EMEDIUMTYPE = 124 +ECANCELED = 125 +ENOKEY = 126 +EKEYEXPIRED = 127 +EKEYREVOKED = 128 +EKEYREJECTED = 129 +EOWNERDEAD = 130 +ENOTRECOVERABLE = 131 +ERFKILL = 132 +EHWPOISON = 133 + +_strerror = { + EPERM: 'Permission denied', + ENOENT: 'No such file or directory', + ESRCH: 'No such process', + EINTR: 'Interrupted system call', + EIO: 'Input/output error', + ENXIO: 'Device not configured', + E2BIG: 'Argument list too long', + ENOEXEC: 'Exec format error', + EBADF: 'Bad file descriptor', + ECHILD: 'No child processes', + EAGAIN: 'Resource temporarily unavailable', + ENOMEM: 'Cannot allocate memory', + EACCES: 'Permission denied', + EFAULT: 'Bad address', + ENOTBLK: 'Block device required', + EBUSY: 'Device busy', + EEXIST: 'File exists', + EXDEV: 'Cross-device link', + ENODEV: 'Operation not supported by device', + ENOTDIR: 'Not a directory', + EISDIR: 'Is a directory', + EINVAL: 'Invalid argument', + ENFILE: 'Too many open files in system', + EMFILE: 'Too many open files', + ENOTTY: 'Inappropriate ioctl for device', + ETXTBSY: 'Text file busy', + EFBIG: 'File too large', + ENOSPC: 'No space left on device', + ESPIPE: 'Illegal seek', + EROFS: 'Read-only filesystem', + EMLINK: 'Too many links', + EPIPE: 'Broken pipe', + EDOM: 'Numerical argument out of domain', + ERANGE: 'Result too large', + EDEADLK: 'Resource deadlock avoided', + ENAMETOOLONG: 'File name too long', + ENOLCK: 'No locks available', + ENOSYS: 'Function not implemented', + ENOTEMPTY: 'Directory not empty', + ELOOP: 'Too many levels of symbolic links', + ENOMSG: 'No message of desired type', + EIDRM: 'Identifier removed', + ECHRNG: 'Channel number out of range', + EL2NSYNC: 'Level 2 not synchronized', + EL3HLT: 'Level 3 halted', + EL3RST: 'Level 3 reset', + ELNRNG: 'Link number out of range', + EUNATCH: 'Protocol driver not attached', + ENOCSI: 'No CSI structure available', + EL2HLT: 'Level 2 halted', + EBADE: 'Invalid exchange', + EBADR: 'Invalid request descriptor', + EXFULL: 'Exchange full', + ENOANO: 'No anode', + EBADRQC: 'Invalid request code', + EBADSLT: 'Invalid slot', + EBFONT: 'Bad font file format', + ENOSTR: 'Device not a stream', + ENODATA: 'No data available', + ETIME: 'Timer expired', + ENOSR: 'Out of streams resources', + ENONET: 'Machine is not on the network', + ENOPKG: 'Package not installed', + EREMOTE: 'Object is remote', + ENOLINK: 'Link has been severed', + EADV: 'Advertise error', + ESRMNT: 'Srmount error', + ECOMM: 'Communication error on send', + EPROTO: 'Protocol error', + EMULTIHOP: 'Multihop attempted', + EDOTDOT: 'RFS specific error', + EBADMSG: 'Bad message', + EOVERFLOW: 'Value too large for defined data type', + ENOTUNIQ: 'Name not unique on network', + EBADFD: 'File descriptor in bad state', + EREMCHG: 'Remote address changed', + ELIBACC: 'Can not access a needed shared library', + ELIBBAD: 'Accessing a corrupted shared library', + ELIBSCN: '.lib section in a.out corrupted', + ELIBMAX: 'Attempting to link in too many shared libraries', + ELIBEXEC: 'Cannot exec a shared library directly', + EILSEQ: 'Invalid or incomplete multibyte or wide character', + ERESTART: 'Interrupted system call should be restarted', + ESTRPIPE: 'Streams pipe error', + EUSERS: 'Too many users', + ENOTSOCK: 'Socket operation on non-socket', + EDESTADDRREQ: 'Destination address required', + EMSGSIZE: 'Message too long', + EPROTOTYPE: 'Protocol wrong type for socket', + ENOPROTOOPT: 'Protocol not available', + EPROTONOSUPPORT: 'Protocol not supported', + ESOCKTNOSUPPORT: 'Socket type not supported', + EOPNOTSUPP: 'Operation not supported', + EPFNOSUPPORT: 'Protocol family not supported', + EAFNOSUPPORT: 'Address family not supported by protocol', + EADDRINUSE: 'Address already in use', + EADDRNOTAVAIL: 'Cannot assign requested address', + ENETDOWN: 'Network is down', + ENETUNREACH: 'Network is unreachable', + ENETRESET: 'Network dropped connection on reset', + ECONNABORTED: 'Software caused connection abort', + ECONNRESET: 'Connection reset by peer', + ENOBUFS: 'No buffer space available', + EISCONN: 'Transport endpoint is already connected', + ENOTCONN: 'Transport endpoint is not connected', + ESHUTDOWN: 'Cannot send after transport endpoint shutdown', + ETOOMANYREFS: 'Too many references: cannot splice', + ETIMEDOUT: 'Connection timed out', + ECONNREFUSED: 'Connection refused', + EHOSTDOWN: 'Host is down', + EHOSTUNREACH: 'No route to host', + EALREADY: 'Operation already in progress', + EINPROGRESS: 'Operation now in progress', + ESTALE: 'Stale file handle', + EUCLEAN: 'Structure needs cleaning', + ENOTNAM: 'Not a XENIX named type file', + ENAVAIL: 'No XENIX semaphores available', + EISNAM: 'Is a named type file', + EREMOTEIO: 'Remote I/O error', + EDQUOT: 'Quota exceeded', + ENOMEDIUM: 'No medium found', + EMEDIUMTYPE: 'Wrong medium type', + ECANCELED: 'Operation canceled', + ENOKEY: 'Required key not available', + EKEYEXPIRED: 'Key has expired', + EKEYREVOKED: 'Key has been revoked', + EKEYREJECTED: 'Key was rejected by service', + EOWNERDEAD: 'Owner died', + ENOTRECOVERABLE: 'State not recoverable', + ERFKILL: 'Operation not possible due to RF-kill', + EHWPOISON: 'Memory page has hardware error', +} + +def strerror(errnum): + """ + Translate Linux errno to string. + + >>> strerror(ENOKEY) + 'Required key not available' + >>> strerror(41) + 'Unknown error 41' + """ + ret = _strerror.get(errnum) + if ret: + return ret + return 'Unknown error {0}'.format(errnum) + +if __name__ == '__main__': + import doctest + doctest.testmod() Index: lib/lib9p/pytest/numalloc.py =================================================================== --- /dev/null +++ lib/lib9p/pytest/numalloc.py @@ -0,0 +1,379 @@ +#! /usr/bin/env python + +""" +Integer number allocator. + +Basically, these keep track of a set of allocatable values in +some range (you provide min and max) and let you allocate out of +the range and return values into the range. + +You may pick a value using "next since last time", or "next +available after provided value". Note that next-after will +wrap around as needed (modular arithmetic style). + +The free lists are thread-locked so that this code can be used +with threads. + + >>> a = NumAlloc(5, 10) # note closed interval: 5..10 inclusive + >>> a + NumAlloc(5, 10) + >>> a.avail + [[5, 10]] + >>> a.alloc() + 5 + >>> a.avail + [[6, 10]] + >>> a.alloc(8) + 8 + >>> a.avail + [[6, 7], [9, 10]] + >>> a.free(5) + >>> a.avail + [[5, 7], [9, 10]] + >>> a.free(8) + >>> a.avail + [[5, 10]] + +Attempting to free a value that is already free is an error: + + >>> a.free(5) + Traceback (most recent call last): + ... + ValueError: free: 5 already available + +You can, however, free a value that is outside the min/max +range. You can also free multiple values at once: + + >>> a.free_multi([0, 1, 2, 4]) + >>> a.avail + [[0, 2], [4, 10]] + >>> a.free_multi([3, 12]) + >>> a.avail + [[0, 10], [12, 12]] + +Note that this changes the min/max values: + + >>> a + NumAlloc(0, 12) + +To prevent adding values outside the min/max range, create the +NumArray with autoextend=False, or set .autoextend=False at any +time: + + >>> a.autoextend = False + >>> a + NumAlloc(0, 12, autoextend=False) + >>> a.free(13) + Traceback (most recent call last): + ... + ValueError: free: 13 is outside range limit + +You can create an empty range, which is really only useful once +you free values into it: + + >>> r = NumAlloc(0, -1) + >>> r + NumAlloc(0, -1) + >>> r.alloc() is None + True + >>> r.free_multi(range(50)) + >>> r + NumAlloc(0, 49) + +Note that r.alloc() starts from where you last left off, even if +you've freed a value: + + >>> r.alloc() + 0 + >>> r.free(0) + >>> r.alloc() + 1 + +Of course, in multithreaded code you can't really depend on this +since it will race other threads. Still, it generally makes for +efficient allocation. To force allocation to start from the +range's minimum, provide the minimum (e.g., r.min_val) as an +argument to r.alloc(): + + >>> r.alloc() + 2 + >>> r.alloc(r.min_val) + 0 + +Providing a number to alloc() tries to allocate that number, +but wraps around to the next one if needed: + + >>> r.alloc(49) + 49 + >>> r.alloc(49) + 3 + >>> r.alloc(99999) + 4 + >>> r.avail + [[5, 48]] + +There is currently no way to find all allocated values, although +the obvious method (going through r.avail) will work. Any iterator +would not be thread-safe. +""" + +import threading + +class NumAlloc(object): + """ + Number allocator object. + """ + def __init__(self, min_val, max_val, autoextend=True): + self.min_val = min_val + self.max_val = max_val + if min_val <= max_val: + self.avail = [[min_val, max_val]] + else: + self.avail = [] + self.autoextend = autoextend + self.last = None + self.lock = threading.Lock() + + def __repr__(self): + myname = self.__class__.__name__ + if self.autoextend: + ae = '' + else: + ae = ', autoextend=False' + return '{0}({1}, {2}{3})'.format(myname, self.min_val, self.max_val, ae) + + def _find_block(self, val): + """ + Find the block that contains val, or that should contain val. + Remember that self.avail is a list of avaliable ranges of + the form [[min1, max1], [min2, max2], ..., [minN, maxN]] + where max1 < min2, max2 < min3, ..., < minN. + + The input value either falls into one of the available + blocks, or falls into a gap between two available blocks. + We want to know which block it goes in, or if it goes + between two, which block it comes before. + + We can do a binary search to find this block. When we + find it, return its index and its values. + + If we find that val is not in a block, return the position + where the value should go, were it to be put into a new + block by itself. E.g., suppose val is 17, and there is a + block [14,16] and a block [18,20]. We would make this + [14,16],[17,17],[18,20] by inserting [17,17] between them. + (Afterward, we will want to fuse all three blocks to make + [14,18]. However, if we insert as block 0, e.g., if the + list starts with [18,20] and we insert to get + [17,17][18,20], we really end up just modifying block 0 to + [17,20]. Or, if we insert as the new final block, we + might end up modifying the last block.) + """ + low = 0 + high = len(self.avail) - 1 + while low <= high: + mid = low + ((high - low) // 2) + pair = self.avail[mid] + if val < pair[0]: + # must go before block mid + high = mid - 1 + elif val > pair[1]: + # must go after block mid + low = mid + 1 + else: + # val >= first and val <= last, so we found it + return mid, pair + # Low > high: no block actually contains val, or + # there are no blocks at all. If there are no blocks, + # return block #0 and None. Otherwise return the + return low, None + + def alloc(self, val=None): + """ + Get new available value. + + If val is None, we start from the most recently + allocated value, plus 1. + + If val is a numeric value, we start from that value. + Hence, since the range is min_val..max_val, you can + provide min_val to take the first available value. + + This may return None, if no values are still available. + """ + with self.lock: + if val is None: + val = self.last + 1 if self.last is not None else self.min_val + if val is None or val > self.max_val or val < self.min_val: + val = self.min_val + i, pair = self._find_block(val) + if pair is None: + # Value is is not available. The next + # available value that is greater than val + # is in the block right after block i. + # If there is no block after i, the next + # available value is in block 0. If there + # is no block 0, there are no available + # values. + nblocks = len(self.avail) + i += 1 + if i >= nblocks: + if nblocks == 0: + return None + i = 0 + pair = self.avail[i] + val = pair[0] + # Value val is available - take it. + # + # There are four special cases to handle. + # + # 1. pair[0] < val < pair[1]: split the pair. + # 2. pair[0] == val < pair[1]: increase pair[0]. + # 3. pair[0] == val == pair[1]: delete the pair + # 4. pair[0] < val == pair[1]: decrease pair[1]. + assert pair[0] <= val <= pair[1] + if pair[0] == val: + # case 2 or 3: Take the left edge or delete the pair. + if val == pair[1]: + del self.avail[i] + else: + pair[0] = val + 1 + else: + # case 1 or 4: split the pair or take the right edge. + if val == pair[1]: + pair[1] = val - 1 + else: + newpair = [val + 1, pair[1]] + pair[1] = val - 1 + self.avail.insert(i + 1, newpair) + self.last = val + return val + + def free(self, val): + "Free one value" + self._free_multi('free', [val]) + + def free_multi(self, values): + "Free many values (provide any iterable)" + values = list(values) + values.sort() + self._free_multi('free_multi', values) + + def _free_multi(self, how, values): + """ + Free a (sorted) list of values. + """ + if len(values) == 0: + return + with self.lock: + while values: + # Take highest value, and any contiguous lower values. + # Note that it can be significantly faster this way + # since coalesced ranges make for shorter copies. + highval = values.pop() + val = highval + while len(values) and values[-1] == val - 1: + val = values.pop() + self._free_range(how, val, highval) + + def _maybe_increase_max(self, how, val): + """ + If needed, widen our range to include new high val -- i.e., + possibly increase self.max_val. Do nothing if this is not a + new all time high; fail if we have autoextend disabled. + """ + if val <= self.max_val: + return + if self.autoextend: + self.max_val = val + return + raise ValueError('{0}: {1} is outside range limit'.format(how, val)) + + def _maybe_decrease_min(self, how, val): + """ + If needed, widen our range to include new low val -- i.e., + possibly decrease self.min_val. Do nothing if this is not a + new all time low; fail if we have autoextend disabled. + """ + if val >= self.min_val: + return + if self.autoextend: + self.min_val = val + return + raise ValueError('{0}: {1} is outside range limit'.format(how, val)) + + def _free_range(self, how, val, highval): + """ + Free the range [val..highval]. Note, val==highval it's just + a one-element range. + + The lock is already held. + """ + # Find the place to store the lower value. + # We should never find an actual pair here. + i, pair = self._find_block(val) + if pair: + raise ValueError('{0}: {1} already available'.format(how, val)) + # If we're freeing a range, check that the high val + # does not span into the *next* range, either. + if highval > val and i < len(self.avail): + if self.avail[i][0] <= highval: + raise ValueError('{0}: {2} (from {{1}..{2}) already ' + 'available'.format(how, val, highval)) + + # We'll need to insert a block and perhaps fuse it + # with blocks before and/or after. First, check + # whether there *is* a before and/or after, and find + # their corresponding edges and whether we abut them. + if i > 0: + abuts_below = self.avail[i - 1][1] + 1 == val + else: + abuts_below = False + if i < len(self.avail): + abuts_above = self.avail[i][0] - 1 == highval + else: + abuts_above = False + # Now there are these four cases: + # 1. abuts below and above: fuse the two blocks. + # 2. abuts below only: adjust previous (i-1'th) block + # 3. abuts above only: adjust next (i'th) block + # 4. doesn't abut: insert new block + if abuts_below: + if abuts_above: + # case 1 + self.avail[i - 1][1] = self.avail[i][1] + del self.avail[i] + else: + # case 2 + self._maybe_increase_max(how, highval) + self.avail[i - 1][1] = highval + else: + if abuts_above: + # case 3 + self._maybe_decrease_min(how, val) + self.avail[i][0] = val + else: + # case 4 + self._maybe_decrease_min(how, val) + self._maybe_increase_max(how, highval) + newblock = [val, highval] + self.avail.insert(i, newblock) + +if __name__ == '__main__': + import doctest + import sys + + doctest.testmod() + if sys.version_info[0] >= 3: + xrange = range + # run some worst case tests + # NB: coalesce is terribly slow when done bottom up + r = NumAlloc(0, 2**16 - 1) + for i in xrange(r.min_val, r.max_val, 2): + r.alloc(i) + print('worst case alloc: len(r.avail) = {0}'.format(len(r.avail))) + for i in xrange(r.max_val - 1, r.min_val, -2): + r.free(i) + print('free again; len(r.avail) should be 1; is {0}'.format(len(r.avail))) + if len(r.avail) != 1: + sys.exit('failure') Index: lib/lib9p/pytest/p9conn.py =================================================================== --- /dev/null +++ lib/lib9p/pytest/p9conn.py @@ -0,0 +1,1788 @@ +#! /usr/bin/env python + +""" +handle plan9 server <-> client connections + +(We can act as either server or client.) + +This code needs some doctests or other unit tests... +""" + +import collections +import errno +import logging +import math +import os +import socket +import stat +import struct +import sys +import threading +import time + +import lerrno +import numalloc +import p9err +import pfod +import protocol + +# Timespec based timestamps, if present, have +# both seconds and nanoseconds. +Timespec = collections.namedtuple('Timespec', 'sec nsec') + +# File attributes from Tgetattr, or given to Tsetattr. +# (move to protocol.py?) We use pfod here instead of +# namedtuple so that we can create instances with all-None +# fields easily. +Fileattrs = pfod.pfod('Fileattrs', + 'ino mode uid gid nlink rdev size blksize blocks ' + 'atime mtime ctime btime gen data_version') + +qt2n = protocol.qid_type2name + +STD_P9_PORT=564 + +class P9Error(Exception): + pass + +class RemoteError(P9Error): + """ + Used when the remote returns an error. We track the client + (connection instance), the operation being attempted, the + message, and an error number and type. The message may be + from the Rerror reply, or from converting the errno in a dot-L + or dot-u Rerror reply. The error number may be None if the + type is 'Rerror' rather than 'Rlerror'. The message may be + None or empty string if a non-None errno supplies the error + instead. + """ + def __init__(self, client, op, msg, etype, errno): + self.client = str(client) + self.op = op + self.msg = msg + self.etype = etype # 'Rerror' or 'Rlerror' + self.errno = errno # may be None + self.message = self._get_message() + super(RemoteError, self).__init__(self, self.message) + + def __repr__(self): + return ('{0!r}({1}, {2}, {3}, {4}, ' + '{5})'.format(self.__class__.__name__, self.client, self.op, + self.msg, self.errno, self.etype)) + def __str__(self): + prefix = '{0}: {1}: '.format(self.client, self.op) + if self.errno: # check for "is not None", or just non-false-y? + name = {'Rerror': '.u', 'Rlerror': 'Linux'}[self.etype] + middle = '[{0} error {1}] '.format(name, self.errno) + else: + middle = '' + return '{0}{1}{2}'.format(prefix, middle, self.message) + + def is_ENOTSUP(self): + if self.etype == 'Rlerror': + return self.errno == lerrno.EOPNOTSUPP + return self.errno == errno.EOPNOTSUPP + + def _get_message(self): + "get message based on self.msg or self.errno" + if self.errno is not None: + return { + 'Rlerror': p9err.dotl_strerror, + 'Rerror' : p9err.dotu_strerror, + }[self.etype](self.errno) + return self.msg + +class LocalError(P9Error): + pass + +class TEError(LocalError): + pass + +class P9SockIO(object): + """ + Common base for server and client, handle send and + receive to communications channel. Note that this + need not set up the channel initially, only the logger. + The channel is typically connected later. However, you + can provide one initially. + """ + def __init__(self, logger, name=None, server=None, port=STD_P9_PORT): + self.logger = logger + self.channel = None + self.name = name + self.maxio = None + self.size_coder = struct.Struct(' self.maxio: + raise LocalError('new maxio={0} > current {1}'.format(maxio, + self.maxio)) + self.maxio = maxio + self.max_payload = maxio - self.size_coder.size + + def declare_disconnected(self): + "Declare comm channel dead (note: leaves self.name set!)" + self.channel = None + self.maxio = None + + def shutwrite(self): + "Do a SHUT_WR on the outbound channel - can't send more" + chan = self.channel + # we're racing other threads here + try: + chan.shutdown(socket.SHUT_WR) + except (OSError, AttributeError): + pass + + def shutdown(self): + "Shut down comm channel" + if self.channel: + try: + self.channel.shutdown(socket.SHUT_RDWR) + except socket.error: + pass + self.channel.close() + self.declare_disconnected() + + def read(self): + """ + Try to read a complete packet. + + Returns '' for EOF, as read() usually does. + + If we can't even get the size, this still returns ''. + If we get a sensible size but are missing some data, + we can return a short packet. Since we know if we did + this, we also return a boolean: True means "really got a + complete packet." + + Note that '' EOF always returns False: EOF is never a + complete packet. + """ + if self.channel is None: + return b'', False + size_field = self.xread(self.size_coder.size) + if len(size_field) < self.size_coder.size: + if len(size_field) == 0: + self.logger.log(logging.INFO, '%s: normal EOF', self) + else: + self.logger.log(logging.ERROR, + '%s: EOF while reading size (got %d bytes)', + self, len(size_field)) + # should we raise an error here? + return b'', False + + size = self.size_coder.unpack(size_field)[0] - self.size_coder.size + if size <= 0 or size > self.max_payload: + self.logger.log(logging.ERROR, + '%s: incoming size %d is insane ' + '(max payload is %d)', + self, size, self.max_payload) + # indicate EOF - should we raise an error instead, here? + return b'', False + data = self.xread(size) + return data, len(data) == size + + def xread(self, nbytes): + """ + Read nbytes bytes, looping if necessary. Return '' for + EOF; may return a short count if we get some data, then + EOF. + """ + assert nbytes > 0 + # Try to get everything at once (should usually succeed). + # Return immediately for EOF or got-all-data. + data = self.channel.recv(nbytes) + if data == b'' or len(data) == nbytes: + return data + + # Gather data fragments into an array, then join it all at + # the end. + count = len(data) + data = [data] + while count < nbytes: + more = self.channel.recv(nbytes - count) + if more == b'': + break + count += len(more) + data.append(more) + return b''.join(data) + + def write(self, data): + """ + Write all the data, in the usual encoding. Note that + the length of the data, including the length of the length + itself, is already encoded in the first 4 bytes of the + data. + + Raises IOError if we can't write everything. + + Raises LocalError if len(data) exceeds max_payload. + """ + size = len(data) + assert size >= 4 + if size > self.max_payload: + raise LocalError('data length {0} exceeds ' + 'maximum {1}'.format(size, self.max_payload)) + self.channel.sendall(data) + +def _pathcat(prefix, suffix): + """ + Concatenate paths we are using on the server side. This is + basically just prefix + / + suffix, with two complications: + + It's possible we don't have a prefix path, in which case + we want the suffix without a leading slash. + + It's possible that the prefix is just b'/', in which case we + want prefix + suffix. + """ + if prefix: + if prefix == b'/': # or prefix.endswith(b'/')? + return prefix + suffix + return prefix + b'/' + suffix + return suffix + +class P9Client(P9SockIO): + """ + Act as client. + + We need the a logger (see logging), a timeout, and a protocol + version to request. By default, we will downgrade to a lower + version if asked. + + If server and port are supplied, they are remembered and become + the default for .connect() (which is still deferred). + + Note that we keep a table of fid-to-path in self.live_fids, + but at any time (except while holding the lock) a fid can + be deleted entirely, and the table entry may just be True + if we have no path name. In general, we update the name + when we can. + """ + def __init__(self, logger, timeout, version, may_downgrade=True, + server=None, port=None): + super(P9Client, self).__init__(logger) + self.timeout = timeout + self.iproto = protocol.p9_version(version) + self.may_downgrade = may_downgrade + self.tagalloc = numalloc.NumAlloc(0, 65534) + self.tagstate = {} + # The next bit is slighlty dirty: perhaps we should just + # allocate NOFID out of the 2**32-1 range, so as to avoid + # "knowing" that it's 2**32-1. + self.fidalloc = numalloc.NumAlloc(0, protocol.td.NOFID - 1) + self.live_fids = {} + self.rootfid = None + self.rootqid = None + self.rthread = None + self.lock = threading.Lock() + self.new_replies = threading.Condition(self.lock) + self._monkeywrench = {} + self._server = server + self._port = port + self._unsup = {} + + def get_monkey(self, what): + "check for a monkey-wrench" + with self.lock: + wrench = self._monkeywrench.get(what) + if wrench is None: + return None + if isinstance(wrench, list): + # repeats wrench[0] times, or forever if that's 0 + ret = wrench[1] + if wrench[0] > 0: + wrench[0] -= 1 + if wrench[0] == 0: + del self._monkeywrench[what] + else: + ret = wrench + del self._monkeywrench[what] + return ret + + def set_monkey(self, what, how, repeat=None): + """ + Set a monkey-wrench. If repeat is not None it is the number of + times the wrench is applied (0 means forever, or until you call + set again with how=None). What is what to monkey-wrench, which + depends on the op. How is generally a replacement value. + """ + if how is None: + with self.lock: + try: + del self._monkeywrench[what] + except KeyError: + pass + return + if repeat is not None: + how = [repeat, how] + with self.lock: + self._monkeywrench[what] = how + + def get_tag(self, for_Tversion=False): + "get next available tag ID" + with self.lock: + if for_Tversion: + tag = 65535 + else: + tag = self.tagalloc.alloc() + if tag is None: + raise LocalError('all tags in use') + self.tagstate[tag] = True # ie, in use, still waiting + return tag + + def set_tag(self, tag, reply): + "set the reply info for the given tag" + assert tag >= 0 and tag < 65536 + with self.lock: + # check whether we're still waiting for the tag + state = self.tagstate.get(tag) + if state is True: + self.tagstate[tag] = reply # i.e., here's the answer + self.new_replies.notify_all() + return + # state must be one of these... + if state is False: + # We gave up on this tag. Reply came anyway. + self.logger.log(logging.INFO, + '%s: got tag %d = %r after timing out on it', + self, tag, reply) + self.retire_tag_locked(tag) + return + if state is None: + # We got a tag back from the server that was not + # outstanding! + self.logger.log(logging.WARNING, + '%s: got tag %d = %r when tag %d not in use!', + self, tag, reply, tag) + return + # We got a second reply before handling the first reply! + self.logger.log(logging.WARNING, + '%s: got tag %d = %r when tag %d = %r!', + self, tag, reply, tag, state) + return + + def retire_tag(self, tag): + "retire the given tag - only used by the thread that handled the result" + if tag == 65535: + return + assert tag >= 0 and tag < 65535 + with self.lock: + self.retire_tag_locked(tag) + + def retire_tag_locked(self, tag): + "retire the given tag while holding self.lock" + # must check "in tagstate" because we can race + # with retire_all_tags. + if tag in self.tagstate: + del self.tagstate[tag] + self.tagalloc.free(tag) + + def retire_all_tags(self): + "retire all tags, after connection drop" + with self.lock: + # release all tags in any state (waiting, answered, timedout) + self.tagalloc.free_multi(self.tagstate.keys()) + self.tagstate = {} + self.new_replies.notify_all() + + def alloc_fid(self): + "allocate new fid" + with self.lock: + fid = self.fidalloc.alloc() + self.live_fids[fid] = True + return fid + + def getpath(self, fid): + "get path from fid, or return None if no path known, or not valid" + with self.lock: + path = self.live_fids.get(fid) + if path is True: + path = None + return path + + def getpathX(self, fid): + """ + Much like getpath, but return if necessary. + If we do have a path, return its repr(). + """ + path = self.getpath(fid) + if path is None: + return ''.format(fid) + return repr(path) + + def setpath(self, fid, path): + "associate fid with new path (possibly from another fid)" + with self.lock: + if isinstance(path, int): + path = self.live_fids.get(path) + # path might now be None (not a live fid after all), or + # True (we have no path name), or potentially even the + # empty string (invalid for our purposes). Treat all of + # those as True, meaning "no known path". + if not path: + path = True + if self.live_fids.get(fid): + # Existing fid maps to either True or its old path. + # Set the new path (which may be just a placeholder). + self.live_fids[fid] = path + + def did_rename(self, fid, ncomp, newdir=None): + """ + Announce that we renamed using a fid - we'll try to update + other fids based on this (we can't really do it perfectly). + + NOTE: caller must provide a final-component. + The caller can supply the new path (and should + do so if the rename is not based on the retained path + for the supplied fid, i.e., for rename ops where fid + can move across directories). The rules: + + - If newdir is None (default), we use stored path. + - Otherwise, newdir provides the best approximation + we have to the path that needs ncomp appended. + + (This is based on the fact that renames happen via Twstat + or Trename, or Trenameat, which change just one tail component, + but the path names vary.) + """ + if ncomp is None: + return + opath = self.getpath(fid) + if newdir is None: + if opath is None: + return + ocomps = opath.split(b'/') + ncomps = ocomps[0:-1] + else: + ocomps = None # well, none yet anyway + ncomps = newdir.split(b'/') + ncomps.append(ncomp) + if opath is None or opath[0] != '/': + # We don't have enough information to fix anything else. + # Just store the new path and return. We have at least + # a partial path now, which is no worse than before. + npath = b'/'.join(ncomps) + with self.lock: + if fid in self.live_fids: + self.live_fids[fid] = npath + return + if ocomps is None: + ocomps = opath.split(b'/') + olen = len(ocomps) + ofinal = ocomps[olen - 1] + # Old paths is full path. Find any other fids that start + # with some or all the components in ocomps. Note that if + # we renamed /one/two/three to /four/five this winds up + # renaming files /one/a to /four/a, /one/two/b to /four/five/b, + # and so on. + with self.lock: + for fid2, path2 in self.live_fids.iteritems(): + # Skip fids without byte-string paths + if not isinstance(path2, bytes): + continue + # Before splitting (which is a bit expensive), try + # a straightforward prefix match. This might give + # some false hits, e.g., prefix /one/two/threepenny + # starts with /one/two/three, but it quickly eliminates + # /raz/baz/mataz and the like. + if not path2.startswith(opath): + continue + # Split up the path, and use that to make sure that + # the final component is a full match. + parts2 = path2.split(b'/') + if parts2[olen - 1] != ofinal: + continue + # OK, path2 starts with the old (renamed) sequence. + # Replace the old components with the new ones. + # This updates the renamed fid when we come across + # it! It also handles a change in the number of + # components, thanks to Python's slice assignment. + parts2[0:olen] = ncomps + self.live_fids[fid2] = b'/'.join(parts2) + + def retire_fid(self, fid): + "retire one fid" + with self.lock: + self.fidalloc.free(fid) + del self.live_fids[fid] + + def retire_all_fids(self): + "return live fids to pool" + # this is useful for debugging fid leaks: + #for fid in self.live_fids: + # print 'retiring', fid, self.getpathX(fid) + with self.lock: + self.fidalloc.free_multi(self.live_fids.keys()) + self.live_fids = {} + + def read_responses(self): + "Read responses. This gets spun off as a thread." + while self.is_connected(): + pkt, is_full = super(P9Client, self).read() + if pkt == b'': + self.shutwrite() + self.retire_all_tags() + return + if not is_full: + self.logger.log(logging.WARNING, '%s: got short packet', self) + try: + # We have one special case: if we're not yet connected + # with a version, we must unpack *as if* it's a plain + # 9P2000 response. + if self.have_version: + resp = self.proto.unpack(pkt) + else: + resp = protocol.plain.unpack(pkt) + except protocol.SequenceError as err: + self.logger.log(logging.ERROR, '%s: bad response: %s', + self, err) + try: + resp = self.proto.unpack(pkt, noerror=True) + except protocol.SequenceError: + header = self.proto.unpack_header(pkt, noerror=True) + self.logger.log(logging.ERROR, + '%s: (not even raw-decodable)', self) + self.logger.log(logging.ERROR, + '%s: header decode produced %r', + self, header) + else: + self.logger.log(logging.ERROR, + '%s: raw decode produced %r', + self, resp) + # after this kind of problem, probably need to + # shut down, but let's leave that out for a bit + else: + # NB: all protocol responses have a "tag", + # so resp['tag'] always exists. + self.logger.log(logging.DEBUG, "read_resp: tag %d resp %r", resp.tag, resp) + self.set_tag(resp.tag, resp) + + def wait_for(self, tag): + """ + Wait for a response to the given tag. Return the response, + releasing the tag. If self.timeout is not None, wait at most + that long (and release the tag even if there's no reply), else + wait forever. + + If this returns None, either the tag was bad initially, or + a timeout occurred, or the connection got shut down. + """ + self.logger.log(logging.DEBUG, "wait_for: tag %d", tag) + if self.timeout is None: + deadline = None + else: + deadline = time.time() + self.timeout + with self.lock: + while True: + # tagstate is True (waiting) or False (timedout) or + # a valid response, or None if we've reset the tag + # states (retire_all_tags, after connection drop). + resp = self.tagstate.get(tag, None) + if resp is None: + # out of sync, exit loop + break + if resp is True: + # still waiting for a response - wait some more + self.new_replies.wait(self.timeout) + if deadline and time.time() > deadline: + # Halt the waiting, but go around once more. + # Note we may have killed the tag by now though. + if tag in self.tagstate: + self.tagstate[tag] = False + continue + # resp is either False (timeout) or a reply. + # If resp is False, change it to None; the tag + # is now dead until we get a reply (then we + # just toss the reply). + # Otherwise, we're done with the tag: free it. + # In either case, stop now. + if resp is False: + resp = None + else: + self.tagalloc.free(tag) + del self.tagstate[tag] + break + return resp + + def badresp(self, req, resp): + """ + Complain that a response was not something expected. + """ + if resp is None: + self.shutdown() + raise TEError('{0}: {1}: timeout or EOF'.format(self, req)) + if isinstance(resp, protocol.rrd.Rlerror): + raise RemoteError(self, req, None, 'Rlerror', resp.ecode) + if isinstance(resp, protocol.rrd.Rerror): + if resp.errnum is None: + raise RemoteError(self, req, resp.errstr, 'Rerror', None) + raise RemoteError(self, req, None, 'Rerror', resp.errnum) + raise LocalError('{0}: {1} got response {2!r}'.format(self, req, resp)) + + def supports(self, req_code): + """ + Test self.proto.support(req_code) unless we've recorded that + while the protocol supports it, the client does not. + """ + return req_code not in self._unsup and self.proto.supports(req_code) + + def supports_all(self, *req_codes): + "basically just all(supports(...))" + return all(self.supports(code) for code in req_codes) + + def unsupported(self, req_code): + """ + Record an ENOTSUP (RemoteError was ENOTSUP) for a request. + Must be called from the op, this does not happen automatically. + (It's just an optimization.) + """ + self._unsup[req_code] = True + + def connect(self, server=None, port=None): + """ + Connect to given server/port pair. + + The server and port are remembered. If given as None, + the last remembered values are used. The initial + remembered values are from the creation of this client + instance. + + New values are only remembered here on a *successful* + connect, however. + """ + if server is None: + server = self._server + if server is None: + raise LocalError('connect: no server specified and no default') + if port is None: + port = self._port + if port is None: + port = STD_P9_PORT + self.name = None # wipe out previous name, if any + super(P9Client, self).connect(server, port) + maxio = self.get_recommended_maxio() + self.declare_connected(None, None, maxio) + self.proto = self.iproto # revert to initial protocol + self.have_version = False + self.rthread = threading.Thread(target=self.read_responses) + self.rthread.start() + tag = self.get_tag(for_Tversion=True) + req = protocol.rrd.Tversion(tag=tag, msize=maxio, + version=self.get_monkey('version')) + super(P9Client, self).write(self.proto.pack_from(req)) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rversion): + self.shutdown() + if isinstance(resp, protocol.rrd.Rerror): + version = req.version or self.proto.get_version() + # for python3, we need to convert version to string + if not isinstance(version, str): + version = version.decode('utf-8', 'surrogateescape') + raise RemoteError(self, 'version ' + version, + resp.errstr, 'Rerror', None) + self.badresp('version', resp) + their_maxio = resp.msize + try: + self.reduce_maxio(their_maxio) + except LocalError as err: + raise LocalError('{0}: sent maxio={1}, they tried {2}: ' + '{3}'.format(self, maxio, their_maxio, + err.args[0])) + if resp.version != self.proto.get_version(): + if not self.may_downgrade: + self.shutdown() + raise LocalError('{0}: they only support ' + 'version {1!r}'.format(self, resp.version)) + # raises LocalError if the version is bad + # (should we wrap it with a connect-to-{0} msg?) + self.proto = self.proto.downgrade_to(resp.version) + self._server = server + self._port = port + self.have_version = True + + def attach(self, afid, uname, aname, n_uname): + """ + Attach. + + Currently we don't know how to do authentication, + but we'll pass any provided afid through. + """ + if afid is None: + afid = protocol.td.NOFID + if uname is None: + uname = '' + if aname is None: + aname = '' + if n_uname is None: + n_uname = protocol.td.NONUNAME + tag = self.get_tag() + fid = self.alloc_fid() + pkt = self.proto.Tattach(tag=tag, fid=fid, afid=afid, + uname=uname, aname=aname, + n_uname=n_uname) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rattach): + self.retire_fid(fid) + self.badresp('attach', resp) + # probably should check resp.qid + self.rootfid = fid + self.rootqid = resp.qid + self.setpath(fid, b'/') + + def shutdown(self): + "disconnect from server" + if self.rootfid is not None: + self.clunk(self.rootfid, ignore_error=True) + self.retire_all_tags() + self.retire_all_fids() + self.rootfid = None + self.rootqid = None + super(P9Client, self).shutdown() + if self.rthread: + self.rthread.join() + self.rthread = None + + def dupfid(self, fid): + """ + Copy existing fid to a new fid. + """ + tag = self.get_tag() + newfid = self.alloc_fid() + pkt = self.proto.Twalk(tag=tag, fid=fid, newfid=newfid, nwname=0, + wname=[]) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rwalk): + self.retire_fid(newfid) + self.badresp('walk {0}'.format(self.getpathX(fid)), resp) + # Copy path too + self.setpath(newfid, fid) + return newfid + + def lookup(self, fid, components): + """ + Do Twalk. Caller must provide a starting fid, which should + be rootfid to look up from '/' - we do not do / vs . here. + Caller must also provide a component-ized path (on purpose, + so that caller can provide invalid components like '' or '/'). + The components must be byte-strings as well, for the same + reason. + + We do allocate the new fid ourselves here, though. + + There's no logic here to split up long walks (yet?). + """ + # these are too easy to screw up, so check + if self.rootfid is None: + raise LocalError('{0}: not attached'.format(self)) + if (isinstance(components, (str, bytes) or + not all(isinstance(i, bytes) for i in components))): + raise LocalError('{0}: lookup: invalid ' + 'components {1!r}'.format(self, components)) + tag = self.get_tag() + newfid = self.alloc_fid() + startpath = self.getpath(fid) + pkt = self.proto.Twalk(tag=tag, fid=fid, newfid=newfid, + nwname=len(components), wname=components) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rwalk): + self.retire_fid(newfid) + self.badresp('walk {0} in ' + '{1}'.format(components, self.getpathX(fid)), + resp) + # Just because we got Rwalk does not mean we got ALL the + # way down the path. Raise OSError(ENOENT) if we're short. + if resp.nwqid > len(components): + # ??? this should be impossible. Local error? Remote error? + # OS Error? + self.clunk(newfid, ignore_error=True) + raise LocalError('{0}: walk {1} in {2} returned {3} ' + 'items'.format(self, components, + self.getpathX(fid), resp.nwqid)) + if resp.nwqid < len(components): + self.clunk(newfid, ignore_error=True) + # Looking up a/b/c and got just a/b, c is what's missing. + # Looking up a/b/c and got just a, b is what's missing. + missing = components[resp.nwqid] + within = _pathcat(startpath, b'/'.join(components[:resp.nwqid])) + raise OSError(errno.ENOENT, + '{0}: {1} in {2}'.format(os.strerror(errno.ENOENT), + missing, within)) + self.setpath(newfid, _pathcat(startpath, b'/'.join(components))) + return newfid, resp.wqid + + def lookup_last(self, fid, components): + """ + Like lookup, but return only the last component's qid. + As a special case, if components is an empty list, we + handle that. + """ + rfid, wqid = self.lookup(fid, components) + if len(wqid): + return rfid, wqid[-1] + if fid == self.rootfid: # usually true, if we get here at all + return rfid, self.rootqid + tag = self.get_tag() + pkt = self.proto.Tstat(tag=tag, fid=rfid) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rstat): + self.badresp('stat {0}'.format(self.getpathX(fid)), resp) + statval = self.proto.unpack_wirestat(resp.data) + return rfid, statval.qid + + def clunk(self, fid, ignore_error=False): + "issue clunk(fid)" + tag = self.get_tag() + pkt = self.proto.Tclunk(tag=tag, fid=fid) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rclunk): + if ignore_error: + return + self.badresp('clunk {0}'.format(self.getpathX(fid)), resp) + self.retire_fid(fid) + + def remove(self, fid, ignore_error=False): + "issue remove (old style), which also clunks fid" + tag = self.get_tag() + pkt = self.proto.Tremove(tag=tag, fid=fid) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rremove): + if ignore_error: + # remove failed: still need to clunk the fid + self.clunk(fid, True) + return + self.badresp('remove {0}'.format(self.getpathX(fid)), resp) + self.retire_fid(fid) + + def create(self, fid, name, perm, mode, filetype=None, extension=b''): + """ + Issue create op (note that this may be mkdir, symlink, etc). + fid is the directory in which the create happens, and for + regular files, it becomes, on success, a fid referring to + the now-open file. perm is, e.g., 0644, 0755, etc., + optionally with additional high bits. mode is a mode + byte (e.g., protocol.td.ORDWR, or OWRONLY|OTRUNC, etc.). + + As a service to callers, we take two optional arguments + specifying the file type ('dir', 'symlink', 'device', + 'fifo', or 'socket') and additional info if needed. + The additional info for a symlink is the target of the + link (a byte string), and the additional info for a device + is a byte string with "b " or "c ". + + Otherwise, callers can leave filetype=None and encode the bits + into the mode (caller must still provide extension if needed). + + We do NOT check whether the extension matches extra DM bits, + or that there's only one DM bit set, or whatever, since this + is a testing setup. + """ + tag = self.get_tag() + if filetype is not None: + perm |= { + 'dir': protocol.td.DMDIR, + 'symlink': protocol.td.DMSYMLINK, + 'device': protocol.td.DMDEVICE, + 'fifo': protocol.td.DMNAMEDPIPE, + 'socket': protocol.td.DMSOCKET, + }[filetype] + pkt = self.proto.Tcreate(tag=tag, fid=fid, name=name, + perm=perm, mode=mode, extension=extension) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rcreate): + self.badresp('create {0} in {1}'.format(name, self.getpathX(fid)), + resp) + if resp.qid.type == protocol.td.QTFILE: + # Creating a regular file opens the file, + # thus changing the fid's path. + self.setpath(fid, _pathcat(self.getpath(fid), name)) + return resp.qid, resp.iounit + + def open(self, fid, mode): + "use Topen to open file or directory fid (mode is 1 byte)" + tag = self.get_tag() + pkt = self.proto.Topen(tag=tag, fid=fid, mode=mode) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Ropen): + self.badresp('open {0}'.format(self.getpathX(fid)), resp) + return resp.qid, resp.iounit + + def lopen(self, fid, flags): + "use Tlopen to open file or directory fid (flags from L_O_*)" + tag = self.get_tag() + pkt = self.proto.Tlopen(tag=tag, fid=fid, flags=flags) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rlopen): + self.badresp('lopen {0}'.format(self.getpathX(fid)), resp) + return resp.qid, resp.iounit + + def read(self, fid, offset, count): + "read (up to) count bytes from offset, given open fid" + tag = self.get_tag() + pkt = self.proto.Tread(tag=tag, fid=fid, offset=offset, count=count) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rread): + self.badresp('read {0} bytes at offset {1} in ' + '{2}'.format(count, offset, self.getpathX(fid)), + resp) + return resp.data + + def write(self, fid, offset, data): + "write (up to) count bytes to offset, given open fid" + tag = self.get_tag() + pkt = self.proto.Twrite(tag=tag, fid=fid, offset=offset, + count=len(data), data=data) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rwrite): + self.badresp('write {0} bytes at offset {1} in ' + '{2}'.format(len(data), offset, self.getpathX(fid)), + resp) + return resp.count + + # Caller may + # - pass an actual stat object, or + # - pass in all the individual to-set items by keyword, or + # - mix and match a bit: get an existing stat, then use + # keywords to override fields. + # We convert "None"s to the internal "do not change" values, + # and for diagnostic purposes, can turn "do not change" back + # to None at the end, too. + def wstat(self, fid, statobj=None, **kwargs): + if statobj is None: + statobj = protocol.td.stat() + else: + statobj = statobj._copy() + # Fields in stat that you can't send as a wstat: the + # type and qid are informative. Similarly, the + # 'extension' is an input when creating a file but + # read-only when stat-ing. + # + # It's not clear what it means to set dev, but we'll leave + # it in as an optional parameter here. fs/backend.c just + # errors out on an attempt to change it. + if self.proto == protocol.plain: + forbid = ('type', 'qid', 'extension', + 'n_uid', 'n_gid', 'n_muid') + else: + forbid = ('type', 'qid', 'extension') + nochange = { + 'type': 0, + 'qid': protocol.td.qid(0, 0, 0), + 'dev': 2**32 - 1, + 'mode': 2**32 - 1, + 'atime': 2**32 - 1, + 'mtime': 2**32 - 1, + 'length': 2**64 - 1, + 'name': b'', + 'uid': b'', + 'gid': b'', + 'muid': b'', + 'extension': b'', + 'n_uid': 2**32 - 1, + 'n_gid': 2**32 - 1, + 'n_muid': 2**32 - 1, + } + for field in statobj._fields: + if field in kwargs: + if field in forbid: + raise ValueError('cannot wstat a stat.{0}'.format(field)) + statobj[field] = kwargs.pop(field) + else: + if field in forbid or statobj[field] is None: + statobj[field] = nochange[field] + if kwargs: + raise TypeError('wstat() got an unexpected keyword argument ' + '{0!r}'.format(kwargs.popitem())) + + data = self.proto.pack_wirestat(statobj) + tag = self.get_tag() + pkt = self.proto.Twstat(tag=tag, fid=fid, data=data) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rwstat): + # For error viewing, switch all the do-not-change + # and can't-change fields to None. + statobj.qid = None + for field in statobj._fields: + if field in forbid: + statobj[field] = None + elif field in nochange and statobj[field] == nochange[field]: + statobj[field] = None + self.badresp('wstat {0}={1}'.format(self.getpathX(fid), statobj), + resp) + # wstat worked - change path names if needed + if statobj.name != b'': + self.did_rename(fid, statobj.name) + + def readdir(self, fid, offset, count): + "read (up to) count bytes of dir data from offset, given open fid" + tag = self.get_tag() + pkt = self.proto.Treaddir(tag=tag, fid=fid, offset=offset, count=count) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rreaddir): + self.badresp('readdir {0} bytes at offset {1} in ' + '{2}'.format(count, offset, self.getpathX(fid)), + resp) + return resp.data + + def rename(self, fid, dfid, name): + "invoke Trename: rename file to /name" + tag = self.get_tag() + pkt = self.proto.Trename(tag=tag, fid=fid, dfid=dfid, name=name) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rrename): + self.badresp('rename {0} to {2} in ' + '{1}'.format(self.getpathX(fid), + self.getpathX(dfid), name), + resp) + self.did_rename(fid, name, self.getpath(dfid)) + + def renameat(self, olddirfid, oldname, newdirfid, newname): + "invoke Trenameat: rename /oldname to /newname" + tag = self.get_tag() + pkt = self.proto.Trenameat(tag=tag, + olddirfid=olddirfid, oldname=oldname, + newdirfid=newdirfid, newname=newname) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rrenameat): + self.badresp('rename {1} in {0} to {3} in ' + '{2}'.format(oldname, self.getpathX(olddirfid), + newname, self.getpathX(newdirdfid)), + resp) + # There's no renamed *fid*, just a renamed file! So no + # call to self.did_rename(). + + def unlinkat(self, dirfd, name, flags): + "invoke Tunlinkat - flags should be 0 or protocol.td.AT_REMOVEDIR" + tag = self.get_tag() + pkt = self.proto.Tunlinkat(tag=tag, dirfd=dirfd, + name=name, flags=flags) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Runlinkat): + self.badresp('unlinkat {0} in ' + '{1}'.format(name, self.getpathX(dirfd)), resp) + + def decode_stat_objects(self, bstring, noerror=False): + """ + Read on a directory returns an array of stat objects. + Note that for .u these encode extra data. + + It's possible for this to produce a SequenceError, if + the data are incorrect, unless you pass noerror=True. + """ + objlist = [] + offset = 0 + while offset < len(bstring): + obj, offset = self.proto.unpack_wirestat(bstring, offset, noerror) + objlist.append(obj) + return objlist + + def decode_readdir_dirents(self, bstring, noerror=False): + """ + Readdir on a directory returns an array of dirent objects. + + It's possible for this to produce a SequenceError, if + the data are incorrect, unless you pass noerror=True. + """ + objlist = [] + offset = 0 + while offset < len(bstring): + obj, offset = self.proto.unpack_dirent(bstring, offset, noerror) + objlist.append(obj) + return objlist + + def lcreate(self, fid, name, lflags, mode, gid): + "issue lcreate (.L)" + tag = self.get_tag() + pkt = self.proto.Tlcreate(tag=tag, fid=fid, name=name, + flags=lflags, mode=mode, gid=gid) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rlcreate): + self.badresp('create {0} in ' + '{1}'.format(name, self.getpathX(fid)), resp) + # Creating a file opens the file, + # thus changing the fid's path. + self.setpath(fid, _pathcat(self.getpath(fid), name)) + return resp.qid, resp.iounit + + def mkdir(self, dfid, name, mode, gid): + "issue mkdir (.L)" + tag = self.get_tag() + pkt = self.proto.Tmkdir(tag=tag, dfid=dfid, name=name, + mode=mode, gid=gid) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rmkdir): + self.badresp('mkdir {0} in ' + '{1}'.format(name, self.getpathX(dfid)), resp) + return resp.qid + + # We don't call this getattr(), for the obvious reason. + def Tgetattr(self, fid, request_mask=protocol.td.GETATTR_ALL): + "issue Tgetattr.L - get what you ask for, or everything by default" + tag = self.get_tag() + pkt = self.proto.Tgetattr(tag=tag, fid=fid, request_mask=request_mask) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rgetattr): + self.badresp('Tgetattr {0} of ' + '{1}'.format(request_mask, self.getpathX(fid)), resp) + attrs = Fileattrs() + # Handle the simplest valid-bit tests: + for name in ('mode', 'nlink', 'uid', 'gid', 'rdev', + 'size', 'blocks', 'gen', 'data_version'): + bit = getattr(protocol.td, 'GETATTR_' + name.upper()) + if resp.valid & bit: + attrs[name] = resp[name] + # Handle the timestamps, which are timespec pairs + for name in ('atime', 'mtime', 'ctime', 'btime'): + bit = getattr(protocol.td, 'GETATTR_' + name.upper()) + if resp.valid & bit: + attrs[name] = Timespec(sec=resp[name + '_sec'], + nsec=resp[name + '_nsec']) + # There is no control bit for blksize; qemu and Linux always + # provide one. + attrs.blksize = resp.blksize + # Handle ino, which comes out of qid.path + if resp.valid & protocol.td.GETATTR_INO: + attrs.ino = resp.qid.path + return attrs + + # We don't call this setattr(), for the obvious reason. + # See wstat for usage. Note that time fields can be set + # with either second or nanosecond resolutions, and some + # can be set without supplying an actual timestamp, so + # this is all pretty ad-hoc. + # + # There's also one keyword-only argument, ctime=, + # which means "set SETATTR_CTIME". This has the same effect + # as supplying valid=protocol.td.SETATTR_CTIME. + def Tsetattr(self, fid, valid=0, attrs=None, **kwargs): + if attrs is None: + attrs = Fileattrs() + else: + attrs = attrs._copy() + + # Start with an empty (all-zero) Tsetattr instance. We + # don't really need to zero out tag and fid, but it doesn't + # hurt. Note that if caller says, e.g., valid=SETATTR_SIZE + # but does not supply an incoming size (via "attrs" or a size= + # argument), we'll ask to set that field to 0. + attrobj = protocol.rrd.Tsetattr() + for field in attrobj._fields: + attrobj[field] = 0 + + # In this case, forbid means "only as kwargs": these values + # in an incoming attrs object are merely ignored. + forbid = ('ino', 'nlink', 'rdev', 'blksize', 'blocks', 'btime', + 'gen', 'data_version') + for field in attrs._fields: + if field in kwargs: + if field in forbid: + raise ValueError('cannot Tsetattr {0}'.format(field)) + attrs[field] = kwargs.pop(field) + elif attrs[field] is None: + continue + # OK, we're setting this attribute. Many are just + # numeric - if that's the case, we're good, set the + # field and the appropriate bit. + bitname = 'SETATTR_' + field.upper() + bit = getattr(protocol.td, bitname) + if field in ('mode', 'uid', 'gid', 'size'): + valid |= bit + attrobj[field] = attrs[field] + continue + # Timestamps are special: The value may be given as + # an integer (seconds), or as a float (we convert to + # (we convert to sec+nsec), or as a timespec (sec+nsec). + # If specified as 0, we mean "we are not providing the + # actual time, use the server's time." + # + # The ctime field's value, if any, is *ignored*. + if field in ('atime', 'mtime'): + value = attrs[field] + if hasattr(value, '__len__'): + if len(value) != 2: + raise ValueError('invalid {0}={1!r}'.format(field, + value)) + sec = value[0] + nsec = value[1] + else: + sec = value + if isinstance(sec, float): + nsec, sec = math.modf(sec) + nsec = int(round(nsec * 1000000000)) + else: + nsec = 0 + valid |= bit + attrobj[field + '_sec'] = sec + attrobj[field + '_nsec'] = nsec + if sec != 0 or nsec != 0: + # Add SETATTR_ATIME_SET or SETATTR_MTIME_SET + # as appropriate, to tell the server to *this + # specific* time, instead of just "server now". + bit = getattr(protocol.td, bitname + '_SET') + valid |= bit + if 'ctime' in kwargs: + kwargs.pop('ctime') + valid |= protocol.td.SETATTR_CTIME + if kwargs: + raise TypeError('Tsetattr() got an unexpected keyword argument ' + '{0!r}'.format(kwargs.popitem())) + + tag = self.get_tag() + attrobj.valid = valid + attrobj.tag = tag + attrobj.fid = fid + pkt = self.proto.pack(attrobj) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rsetattr): + self.badresp('Tsetattr {0} {1} of ' + '{2}'.format(valid, attrs, self.getpathX(fid)), resp) + + def xattrwalk(self, fid, name=None): + "walk one name or all names: caller should read() the returned fid" + tag = self.get_tag() + newfid = self.alloc_fid() + pkt = self.proto.Txattrwalk(tag=tag, fid=fid, newfid=newfid, + name=name or '') + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rxattrwalk): + self.retire_fid(newfid) + self.badresp('Txattrwalk {0} of ' + '{1}'.format(name, self.getpathX(fid)), resp) + if name: + self.setpath(newfid, 'xattr:' + name) + else: + self.setpath(newfid, 'xattr') + return newfid, resp.size + + def _pathsplit(self, path, startdir, allow_empty=False): + "common code for uxlookup and uxopen" + if self.rootfid is None: + raise LocalError('{0}: not attached'.format(self)) + if path.startswith(b'/') or startdir is None: + startdir = self.rootfid + components = [i for i in path.split(b'/') if i != b''] + if len(components) == 0 and not allow_empty: + raise LocalError('{0}: {1!r}: empty path'.format(self, path)) + return components, startdir + + def uxlookup(self, path, startdir=None): + """ + Unix-style lookup. That is, lookup('/foo/bar') or + lookup('foo/bar'). If startdir is not None and the + path does not start with '/' we look up from there. + """ + components, startdir = self._pathsplit(path, startdir, allow_empty=True) + return self.lookup_last(startdir, components) + + def uxopen(self, path, oflags=0, perm=None, gid=None, + startdir=None, filetype=None): + """ + Unix-style open()-with-option-to-create, or mkdir(). + oflags is 0/1/2 with optional os.O_CREAT, perm defaults + to 0o666 (files) or 0o777 (directories). If we use + a Linux create or mkdir op, we will need a gid, but it's + not required if you are opening an existing file. + + Adds a final boolean value for "did we actually create". + Raises OSError if you ask for a directory but it's a file, + or vice versa. (??? reconsider this later) + + Note that this does not handle other file types, only + directories. + """ + needtype = { + 'dir': protocol.td.QTDIR, + None: protocol.td.QTFILE, + }[filetype] + omode_byte = oflags & 3 # cheating + # allow looking up /, but not creating / + allow_empty = (oflags & os.O_CREAT) == 0 + components, startdir = self._pathsplit(path, startdir, + allow_empty=allow_empty) + if not (oflags & os.O_CREAT): + # Not creating, i.e., just look up and open existing file/dir. + fid, qid = self.lookup_last(startdir, components) + # If we got this far, use Topen on the fid; we did not + # create the file. + return self._uxopen2(path, needtype, fid, qid, omode_byte, False) + + # Only used if using dot-L, but make sure it's always provided + # since this is generic. + if gid is None: + raise ValueError('gid is required when creating file or dir') + + if len(components) > 1: + # Look up all but last component; this part must succeed. + fid, _ = self.lookup(startdir, components[:-1]) + + # Now proceed with the final component, using fid + # as the start dir. Remember to clunk it! + startdir = fid + clunk_startdir = True + components = components[-1:] + else: + # Use startdir as the start dir, and get a new fid. + # Do not clunk startdir! + clunk_startdir = False + fid = self.alloc_fid() + + # Now look up the (single) component. If this fails, + # assume the file or directory needs to be created. + tag = self.get_tag() + pkt = self.proto.Twalk(tag=tag, fid=startdir, newfid=fid, + nwname=1, wname=components) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if isinstance(resp, protocol.rrd.Rwalk): + if clunk_startdir: + self.clunk(startdir, ignore_error=True) + # fid successfully walked to refer to final component. + # Just need to actually open the file. + self.setpath(fid, _pathcat(self.getpath(startdir), components[0])) + qid = resp.wqid[0] + return self._uxopen2(needtype, fid, qid, omode_byte, False) + + # Walk failed. If we allocated a fid, retire it. Then set + # up a fid that points to the parent directory in which to + # create the file or directory. Note that if we're creating + # a file, this fid will get changed so that it points to the + # file instead of the directory, but if we're creating a + # directory, it will be unchanged. + if fid != startdir: + self.retire_fid(fid) + fid = self.dupfid(startdir) + + try: + qid, iounit = self._uxcreate(filetype, fid, components[0], + oflags, omode_byte, perm, gid) + + # Success. If we created an ordinary file, we have everything + # now as create alters the incoming (dir) fid to open the file. + # Otherwise (mkdir), we need to open the file, as with + # a successful lookup. + # + # Note that qid type should match "needtype". + if filetype != 'dir': + if qid.type == needtype: + return fid, qid, iounit, True + self.clunk(fid, ignore_error=True) + raise OSError(_wrong_file_type(qid), + '{0}: server told to create {1} but ' + 'created {2} instead'.format(path, + qt2n(needtype), + qt2n(qid.type))) + + # Success: created dir; but now need to walk to and open it. + fid = self.alloc_fid() + tag = self.get_tag() + pkt = self.proto.Twalk(tag=tag, fid=startdir, newfid=fid, + nwname=1, wname=components) + super(P9Client, self).write(pkt) + resp = self.wait_for(tag) + if not isinstance(resp, protocol.rrd.Rwalk): + self.clunk(fid, ignore_error=True) + raise OSError(errno.ENOENT, + '{0}: server made dir but then failed to ' + 'find it again'.format(path)) + self.setpath(fid, _pathcat(self.getpath(fid), components[0])) + return self._uxopen2(needtype, fid, qid, omode_byte, True) + finally: + # Regardless of success/failure/exception, make sure + # we clunk startdir if needed. + if clunk_startdir: + self.clunk(startdir, ignore_error=True) + + def _uxcreate(self, filetype, fid, name, oflags, omode_byte, perm, gid): + """ + Helper for creating dir-or-file. The fid argument is the + parent directory on input, but will point to the file (if + we're creating a file) on return. oflags only applies if + we're creating a file (even then we use omode_byte if we + are using the plan9 create op). + """ + # Try to create or mkdir as appropriate. + if self.supports_all(protocol.td.Tlcreate, protocol.td.Tmkdir): + # Use Linux style create / mkdir. + if filetype == 'dir': + if perm is None: + perm = 0o777 + return self.mkdir(startdir, name, perm, gid), None + if perm is None: + perm = 0o666 + lflags = flags_to_linux_flags(oflags) + return self.lcreate(fid, name, lflags, perm, gid) + + if filetype == 'dir': + if perm is None: + perm = protocol.td.DMDIR | 0o777 + else: + perm |= protocol.td.DMDIR + else: + if perm is None: + perm = 0o666 + return self.create(fid, name, perm, omode_byte) + + def _uxopen2(self, needtype, fid, qid, omode_byte, didcreate): + "common code for finishing up uxopen" + if qid.type != needtype: + self.clunk(fid, ignore_error=True) + raise OSError(_wrong_file_type(qid), + '{0}: is {1}, expected ' + '{2}'.format(path, qt2n(qid.type), qt2n(needtype))) + qid, iounit = self.open(fid, omode_byte) + # ? should we re-check qid? it should not have changed + return fid, qid, iounit, didcreate + + def uxmkdir(self, path, perm, gid, startdir=None): + """ + Unix-style mkdir. + + The gid is only applied if we are using .L style mkdir. + """ + components, startdir = self._pathsplit(path, startdir) + clunkme = None + if len(components) > 1: + fid, _ = self.lookup(startdir, components[:-1]) + startdir = fid + clunkme = fid + components = components[-1:] + try: + if self.supports(protocol.td.Tmkdir): + qid = self.mkdir(startdir, components[0], perm, gid) + else: + qid, _ = self.create(startdir, components[0], + protocol.td.DMDIR | perm, + protocol.td.OREAD) + # Should we chown/chgrp the dir? + finally: + if clunkme: + self.clunk(clunkme, ignore_error=True) + return qid + + def uxreaddir(self, path, startdir=None, no_dotl=False): + """ + Read a directory to get a list of names (which may or may not + include '.' and '..'). + + If no_dotl is True (or anything non-false-y), this uses the + plain or .u readdir format, otherwise it uses dot-L readdir + if possible. + """ + components, startdir = self._pathsplit(path, startdir, allow_empty=True) + fid, qid = self.lookup_last(startdir, components) + try: + if qid.type != protocol.td.QTDIR: + raise OSError(errno.ENOTDIR, + '{0}: {1}'.format(self.getpathX(fid), + os.strerror(errno.ENOTDIR))) + # We need both Tlopen and Treaddir to use Treaddir. + if not self.supports_all(protocol.td.Tlopen, protocol.td.Treaddir): + no_dotl = True + if no_dotl: + statvals = self.uxreaddir_stat_fid(fid) + return [i.name for i in statvals] + + dirents = self.uxreaddir_dotl_fid(fid) + return [dirent.name for dirent in dirents] + finally: + self.clunk(fid, ignore_error=True) + + def uxreaddir_stat(self, path, startdir=None): + """ + Use directory read to get plan9 style stat data (plain or .u readdir). + + Note that this gets a fid, then opens it, reads, then clunks + the fid. If you already have a fid, you may want to use + uxreaddir_stat_fid (but note that this opens, yet does not + clunk, the fid). + + We return the qid plus the list of the contents. If the + target is not a directory, the qid will not have type QTDIR + and the contents list will be empty. + + Raises OSError if this is applied to a non-directory. + """ + components, startdir = self._pathsplit(path, startdir) + fid, qid = self.lookup_last(startdir, components) + try: + if qid.type != protocol.td.QTDIR: + raise OSError(errno.ENOTDIR, + '{0}: {1}'.format(self.getpathX(fid), + os.strerror(errno.ENOTDIR))) + statvals = self.ux_readdir_stat_fid(fid) + return qid, statvals + finally: + self.clunk(fid, ignore_error=True) + + def uxreaddir_stat_fid(self, fid): + """ + Implement readdir loop that extracts stat values. + This opens, but does not clunk, the given fid. + + Unlike uxreaddir_stat(), if this is applied to a file, + rather than a directory, it just returns no entries. + """ + statvals = [] + qid, iounit = self.open(fid, protocol.td.OREAD) + # ?? is a zero iounit allowed? if so, what do we use here? + if qid.type == protocol.td.QTDIR: + if iounit <= 0: + iounit = 512 # probably good enough + offset = 0 + while True: + bstring = self.read(fid, offset, iounit) + if bstring == b'': + break + statvals.extend(self.decode_stat_objects(bstring)) + offset += len(bstring) + return statvals + + def uxreaddir_dotl_fid(self, fid): + """ + Implement readdir loop that uses dot-L style dirents. + This opens, but does not clunk, the given fid. + + If applied to a file, the lopen should fail, because of the + L_O_DIRECTORY flag. + """ + dirents = [] + qid, iounit = self.lopen(fid, protocol.td.OREAD | + protocol.td.L_O_DIRECTORY) + # ?? is a zero iounit allowed? if so, what do we use here? + # but, we want a minimum of over 256 anyway, let's go for 512 + if iounit < 512: + iounit = 512 + offset = 0 + while True: + bstring = self.readdir(fid, offset, iounit) + if bstring == b'': + break + ents = self.decode_readdir_dirents(bstring) + if len(ents) == 0: + break # ??? + dirents.extend(ents) + offset = ents[-1].offset + return dirents + + def uxremove(self, path, startdir=None, filetype=None, + force=False, recurse=False): + """ + Implement rm / rmdir, with optional -rf. + if filetype is None, remove dir or file. If 'dir' or 'file' + remove only if it's one of those. If force is set, ignore + failures to remove. If recurse is True, remove contents of + directories (recursively). + + File type mismatches (when filetype!=None) raise OSError (?). + """ + components, startdir = self._pathsplit(path, startdir, allow_empty=True) + # Look up all components. If + # we get an error we'll just assume the file does not + # exist (is this good?). + try: + fid, qid = self.lookup_last(startdir, components) + except RemoteError: + return + if qid.type == protocol.td.QTDIR: + # it's a directory, remove only if allowed. + # Note that we must check for "rm -r /" (len(components)==0). + if filetype == 'file': + self.clunk(fid, ignore_error=True) + raise OSError(_wrong_file_type(qid), + '{0}: is dir, expected file'.format(path)) + isroot = len(components) == 0 + closer = self.clunk if isroot else self.remove + if recurse: + # NB: _rm_recursive does not clunk fid + self._rm_recursive(fid, filetype, force) + # This will fail if the directory is non-empty, unless of + # course we tell it to ignore error. + closer(fid, ignore_error=force) + return + # Not a directory, call it a file (even if socket or fifo etc). + if filetype == 'dir': + self.clunk(fid, ignore_error=True) + raise OSError(_wrong_file_type(qid), + '{0}: is file, expected dir'.format(path)) + self.remove(fid, ignore_error=force) + + def _rm_file_by_dfid(self, dfid, name, force=False): + """ + Remove a file whose name is (no path, just a component + name) whose parent directory is . We may assume that the + file really is a file (or a socket, or fifo, or some such, but + definitely not a directory). + + If force is set, ignore failures. + """ + # If we have unlinkat, that's the fast way. But it may + # return an ENOTSUP error. If it does we shouldn't bother + # doing this again. + if self.supports(protocol.td.Tunlinkat): + try: + self.unlinkat(dfid, name, 0) + return + except RemoteError as err: + if not err.is_ENOTSUP(): + raise + self.unsupported(protocol.td.Tunlinkat) + # fall through to remove() op + # Fall back to lookup + remove. + try: + fid, qid = self.lookup_last(dfid, [name]) + except RemoteError: + # If this has an errno we could tell ENOENT from EPERM, + # and actually raise an error for the latter. Should we? + return + self.remove(fid, ignore_error=force) + + def _rm_recursive(self, dfid, filetype, force): + """ + Recursively remove a directory. filetype is probably None, + but if it's 'dir' we fail if the directory contains non-dir + files. + + If force is set, ignore failures. + + Although we open dfid (via the readdir.*_fid calls) we + do not clunk it here; that's the caller's job. + """ + # first, remove contents + if self.supports_all(protocol.td.Tlopen, protocol.td.Treaddir): + for entry in self.uxreaddir_dotl_fid(dfid): + if entry.name in (b'.', b'..'): + continue + fid, qid = self.lookup(dfid, [entry.name]) + try: + attrs = self.Tgetattr(fid, protocol.td.GETATTR_MODE) + if stat.S_ISDIR(attrs.mode): + self.uxremove(entry.name, dfid, filetype, force, True) + else: + self.remove(fid) + fid = None + finally: + if fid is not None: + self.clunk(fid, ignore_error=True) + else: + for statobj in self.uxreaddir_stat_fid(dfid): + # skip . and .. + name = statobj.name + if name in (b'.', b'..'): + continue + if statobj.qid.type == protocol.td.QTDIR: + self.uxremove(name, dfid, filetype, force, True) + else: + self._rm_file_by_dfid(dfid, name, force) + +def _wrong_file_type(qid): + "return EISDIR or ENOTDIR for passing to OSError" + if qid.type == protocol.td.QTDIR: + return errno.EISDIR + return errno.ENOTDIR + +def flags_to_linux_flags(flags): + """ + Convert OS flags (O_CREAT etc) to Linux flags (protocol.td.L_O_CREAT etc). + """ + flagmap = { + os.O_CREAT: protocol.td.L_O_CREAT, + os.O_EXCL: protocol.td.L_O_EXCL, + os.O_NOCTTY: protocol.td.L_O_NOCTTY, + os.O_TRUNC: protocol.td.L_O_TRUNC, + os.O_APPEND: protocol.td.L_O_APPEND, + os.O_DIRECTORY: protocol.td.L_O_DIRECTORY, + } + + result = flags & os.O_RDWR + flags &= ~os.O_RDWR + for key, value in flagmap.iteritems(): + if flags & key: + result |= value + flags &= ~key + if flags: + raise ValueError('untranslated bits 0x{0:x} in os flags'.format(flags)) + return result Index: lib/lib9p/pytest/p9err.py =================================================================== --- /dev/null +++ lib/lib9p/pytest/p9err.py @@ -0,0 +1,146 @@ +#! /usr/bin/env python + +""" +Error number definitions for 9P2000, .u, and .L. + +Note that there is no native-to-9P2000 (plain) translation +table since 9P2000 takes error *strings* rather than error +*numbers*. +""" + +import errno as _errno +import lerrno as _lerrno +import os as _os + +_native_to_dotu = { + # These are in the "standard" range(1, errno.ERANGE) + # but do not map to themselves, so map them here first. + _errno.ENOTEMPTY: _errno.EPERM, + _errno.EDQUOT: _errno.EPERM, + _errno.ENOSYS: _errno.EPERM, +} + +_native_to_dotl = {} + +# Add standard errno's. +for _i in range(1, _errno.ERANGE): + _native_to_dotu.setdefault(_i, _i) + _native_to_dotl[_i] = _i + +# Add linux errno's. Note that Linux EAGAIN at #11 overrides BSD EDEADLK, +# but Linux has EDEADLK at #35 which overrides BSD EAGAIN, so it all +# works out. +# +# We just list every BSD error name here, since the hasattr()s do +# the real work. +for _i in ( + 'EDEADLK', + 'EAGAIN', + 'EINPROGRESS', + 'EALREADY', + 'ENOTSOCK', + 'EDESTADDRREQ', + 'EMSGSIZE', + 'EPROTOTYPE', + 'ENOPROTOOPT', + 'EPROTONOSUPPORT', + 'ESOCKTNOSUPPORT', + 'EOPNOTSUPP', + 'EPFNOSUPPORT', + 'EAFNOSUPPORT', + 'EADDRINUSE', + 'EADDRNOTAVAIL', + 'ENETDOWN', + 'ENETUNREACH', + 'ENETRESET', + 'ECONNABORTED', + 'ECONNRESET', + 'ENOBUFS', + 'EISCONN', + 'ENOTCONN', + 'ESHUTDOWN', + 'ETOOMANYREFS', + 'ETIMEDOUT', + 'ECONNREFUSED', + 'ELOOP', + 'ENAMETOOLONG', + 'EHOSTDOWN', + 'EHOSTUNREACH', + 'ENOTEMPTY', + 'EPROCLIM', + 'EUSERS', + 'EDQUOT', + 'ESTALE', + 'EREMOTE', + 'EBADRPC', + 'ERPCMISMATCH', + 'EPROGUNAVAIL', + 'EPROGMISMATCH', + 'EPROCUNAVAIL', + 'ENOLCK', + 'ENOSYS', + 'EFTYPE', + 'EAUTH', + 'ENEEDAUTH', + 'EIDRM', + 'ENOMSG', + 'EOVERFLOW', + 'ECANCELED', + 'EILSEQ', + 'EDOOFUS', + 'EBADMSG', + 'EMULTIHOP', + 'ENOLINK', + 'EPROTO', + 'ENOTCAPABLE', + 'ECAPMODE', + 'ENOTRECOVERABLE', + 'EOWNERDEAD', +): + if hasattr(_errno, _i) and hasattr(_lerrno, _i): + _native_to_dotl[getattr(_errno, _i)] = getattr(_lerrno, _i) +del _i + +def to_dotu(errnum): + """ + Translate native errno to 9P2000.u errno. + + >>> import errno + >>> to_dotu(errno.EIO) + 5 + >>> to_dotu(errno.EDQUOT) + 1 + >>> to_dotu(errno.ELOOP) + 5 + + There is a corresponding dotu_strerror() (which is really + just os.strerror): + + >>> dotu_strerror(5) + 'Input/output error' + + """ + return _native_to_dotu.get(errnum, _errno.EIO) # default to EIO + +def to_dotl(errnum): + """ + Translate native errno to 9P2000.L errno. + + >>> import errno + >>> to_dotl(errno.ELOOP) + 40 + + There is a corresponding dotl_strerror(): + + >>> dotl_strerror(40) + 'Too many levels of symbolic links' + """ + return _native_to_dotl.get(errnum, _lerrno.ENOTRECOVERABLE) + +dotu_strerror = _os.strerror + +dotl_strerror = _lerrno.strerror + +if __name__ == '__main__': + import doctest + doctest.testmod() Index: lib/lib9p/pytest/pfod.py =================================================================== --- /dev/null +++ lib/lib9p/pytest/pfod.py @@ -0,0 +1,204 @@ +#! /usr/bin/env python + +from __future__ import print_function + +__all__ = ['pfod', 'OrderedDict'] + +### shameless stealing from namedtuple here + +""" +pfod - prefilled OrderedDict + +This is basically a hybrid of a class and an OrderedDict, +or, sort of a data-only class. When an instance of the +class is created, all its fields are set to None if not +initialized. + +Because it is an OrderedDict you can add extra fields to an +instance, and they will be in inst.keys(). Because it +behaves in a class-like way, if the keys are 'foo' and 'bar' +you can write print(inst.foo) or inst.bar = 3. Setting an +attribute that does not currently exist causes a new key +to be added to the instance. +""" + +import sys as _sys +from keyword import iskeyword as _iskeyword +from collections import OrderedDict +from collections import deque as _deque + +_class_template = '''\ +class {typename}(OrderedDict): + '{typename}({arg_list})' + __slots__ = () + + _fields = {field_names!r} + + def __init__(self, *args, **kwargs): + 'Create new instance of {typename}()' + super({typename}, self).__init__() + args = _deque(args) + for field in self._fields: + if field in kwargs: + self[field] = kwargs.pop(field) + elif len(args) > 0: + self[field] = args.popleft() + else: + self[field] = None + if len(kwargs): + raise TypeError('unexpected kwargs %s' % kwargs.keys()) + if len(args): + raise TypeError('unconsumed args %r' % tuple(args)) + + def _copy(self): + 'copy to new instance' + new = {typename}() + new.update(self) + return new + + def __getattr__(self, attr): + if attr in self: + return self[attr] + raise AttributeError('%r object has no attribute %r' % + (self.__class__.__name__, attr)) + + def __setattr__(self, attr, val): + if attr.startswith('_OrderedDict_'): + super({typename}, self).__setattr__(attr, val) + else: + self[attr] = val + + def __repr__(self): + 'Return a nicely formatted representation string' + return '{typename}({repr_fmt})'.format(**self) +''' + +_repr_template = '{name}={{{name}!r}}' + +# Workaround for py2k exec-as-statement, vs py3k exec-as-function. +# Since the syntax differs, we have to exec the definition of _exec! +if _sys.version_info[0] < 3: + # py2k: need a real function. (There is a way to deal with + # this without a function if the py2k is new enough, but this + # works in more cases.) + exec("""def _exec(string, gdict, ldict): + "Python 2: exec string in gdict, ldict" + exec string in gdict, ldict""") +else: + # py3k: just make an alias for builtin function exec + exec("_exec = exec") + +def pfod(typename, field_names, verbose=False, rename=False): + """ + Return a new subclass of OrderedDict with named fields. + + Fields are accessible by name. Note that this means + that to copy a PFOD you must use _copy() - field names + may not start with '_' unless they are all numeric. + + When creating an instance of the new class, fields + that are not initialized are set to None. + + >>> Point = pfod('Point', ['x', 'y']) + >>> Point.__doc__ # docstring for the new class + 'Point(x, y)' + >>> p = Point(11, y=22) # instantiate with positional args or keywords + >>> p + Point(x=11, y=22) + >>> p['x'] + p['y'] # indexable + 33 + >>> p.x + p.y # fields also accessable by name + 33 + >>> p._copy() + Point(x=11, y=22) + >>> p2 = Point() + >>> p2.extra = 2 + >>> p2 + Point(x=None, y=None) + >>> p2.extra + 2 + >>> p2['extra'] + 2 + """ + + # Validate the field names. At the user's option, either generate an error + if _sys.version_info[0] >= 3: + string_type = str + else: + string_type = basestring + # message or automatically replace the field name with a valid name. + if isinstance(field_names, string_type): + field_names = field_names.replace(',', ' ').split() + field_names = list(map(str, field_names)) + typename = str(typename) + if rename: + seen = set() + for index, name in enumerate(field_names): + if (not all(c.isalnum() or c=='_' for c in name) + or _iskeyword(name) + or not name + or name[0].isdigit() + or name.startswith('_') + or name in seen): + field_names[index] = '_%d' % index + seen.add(name) + for name in [typename] + field_names: + if type(name) != str: + raise TypeError('Type names and field names must be strings') + if not all(c.isalnum() or c=='_' for c in name): + raise ValueError('Type names and field names can only contain ' + 'alphanumeric characters and underscores: %r' % name) + if _iskeyword(name): + raise ValueError('Type names and field names cannot be a ' + 'keyword: %r' % name) + if name[0].isdigit(): + raise ValueError('Type names and field names cannot start with ' + 'a number: %r' % name) + seen = set() + for name in field_names: + if name.startswith('_OrderedDict_'): + raise ValueError('Field names cannot start with _OrderedDict_: ' + '%r' % name) + if name.startswith('_') and not rename: + raise ValueError('Field names cannot start with an underscore: ' + '%r' % name) + if name in seen: + raise ValueError('Encountered duplicate field name: %r' % name) + seen.add(name) + + # Fill-in the class template + class_definition = _class_template.format( + typename = typename, + field_names = tuple(field_names), + arg_list = repr(tuple(field_names)).replace("'", "")[1:-1], + repr_fmt = ', '.join(_repr_template.format(name=name) + for name in field_names), + ) + if verbose: + print(class_definition, + file=verbose if isinstance(verbose, file) else _sys.stdout) + + # Execute the template string in a temporary namespace and support + # tracing utilities by setting a value for frame.f_globals['__name__'] + namespace = dict(__name__='PFOD%s' % typename, + OrderedDict=OrderedDict, _deque=_deque) + try: + _exec(class_definition, namespace, namespace) + except SyntaxError as e: + raise SyntaxError(e.message + ':\n' + class_definition) + result = namespace[typename] + + # For pickling to work, the __module__ variable needs to be set to the frame + # where the named tuple is created. Bypass this step in environments where + # sys._getframe is not defined (Jython for example) or sys._getframe is not + # defined for arguments greater than 0 (IronPython). + try: + result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + pass + + return result + +if __name__ == '__main__': + import doctest + doctest.testmod() Index: lib/lib9p/pytest/protocol.py =================================================================== --- /dev/null +++ lib/lib9p/pytest/protocol.py @@ -0,0 +1,1998 @@ +#! /usr/bin/env python + +""" +Protocol definitions for python based lib9p server/client. + +The sub-namespace td has type definitions (qid, stat) and values +that are "#define" constants in C code (e.g., DMDIR, QTFILE, etc). +This also contains the byte values for protocol codes like Tversion, +Rversion, Rerror, and so on. + + >>> td.Tversion + 100 + >>> td.Rlerror + 7 + +The qid and stat types are PFOD classes and generate instances that +are a cross between namedtuple and OrderedDictionary (see pfod.py +for details): + + >>> td.qid(type=td.QTFILE, path=2, version=1) + qid(type=0, version=1, path=2) + +The td.stat() type output is pretty long, since it has all the +dotu-specific members (used only when packing for dotu/dotl and +set only when unpacking those), so here's just one field: + + >>> td.stat(*(15 * [0])).mode + 0 + >>> import pprint; pprint.pprint(td.stat()._fields) + ('type', + 'dev', + 'qid', + 'mode', + 'atime', + 'mtime', + 'length', + 'name', + 'uid', + 'gid', + 'muid', + 'extension', + 'n_uid', + 'n_gid', + 'n_muid') + +Stat objects sent across the protocol must first be encoded into +wirestat objects, which are basically size-counted pre-sequenced +stat objects. The pre-sequencing uses: + + >>> td.stat_seq + Sequencer('stat') + +For parsing bytes returned in a Tread on a directory, td.wirestat_seq +is the sequencer. However, most users should rely on the packers and +unpackers in each protocol (see {pack,unpack}_wirestat below). + + >>> td.wirestat_seq + Sequencer('wirestat') + +There is a dictionary fcall_to_name that maps from byte value +to protocol code. Names map to themselves as well: + + >>> fcall_names[101] + 'Rversion' + >>> fcall_names['Tversion'] + 'Tversion' + +The sub-namespace rrd has request (Tversion, Topen, etc) and +response (Rversion, Ropen, etc) data definitions. Each of these +is a PFOD class: + + >>> rrd.Tversion(1000, 'hello', tag=0) + Tversion(tag=0, msize=1000, version='hello') + +The function p9_version() looks up the instance of each supported +protocol, or raises a KeyError when given an invalid protocol. +The names may be spelled in any mixture of cases. + +The names plain, dotu, and dotl are predefined as the three +supported protocols: + + >>> p9_version('invalid') + Traceback (most recent call last): + ... + KeyError: 'invalid' + >>> p9_version('9p2000') == plain + True + >>> p9_version('9P2000') == plain + True + >>> p9_version('9P2000.u') == dotu + True + >>> p9_version('9p2000.L') == dotl + True + +Protocol instances have a pack() method that encodes a set of +arguments into a packet. To know what to encode, pack() must +receive an fcall value and a dictionary containing argument +values, or something equivalent. The required argument values +depend on the fcall. For instance, a Tversion fcall needs three +arguments: the version name, the tag, and the msize (these of +course are the pre-filled fields in a Tversion PFOD instance). + + >>> args = {'version': '!', 'tag': 1, 'msize': 1000} + >>> pkt = dotu.pack(fcall='Tversion', args=args) + >>> len(pkt) + 14 + +The length of string '!' is 1, and the packet (or wire) format of +a Tversion request is: + + size[4] fcall[1] tag[2] msize[4] version[s] + +which corresponds to a struct's IBHIH (for the fixed size parts) +followed by 1 B (for the string). The overall packet is 14 bytes +long, so we have size=9, fcall=100, tag=1, msize=1000, and the +version string is length=1, value=33 (ord('!')). + + >>> import struct + >>> struct.unpack('>> dotu.version + '9P2000.u' + >>> args = {'tag': 99, 'msize': 1000} + >>> pkt = dotu.pack(fcall='Tversion', args=args) + >>> len(pkt) + 21 + +The fcall can be supplied numerically: + + >>> pkt2 = dotu.pack(fcall=td.Tversion, args=args) + >>> pkt == pkt2 + True + +Instead of providing an fcall you can provide an instance of +the appropriate PFOD. In this case pack() finds the type from +the PFOD instance. As usual, the version parameter is filled in +for you: + + >>> pkt2 = dotu.pack(rrd.Tversion(tag=99, msize=1000)) + >>> pkt == pkt2 + True + +Note that it's up to you to check the other end's version and +switch to a "lower" protocol as needed. Each instance does provide +a downgrade_to() method that gets you a possibly-downgraded instance. +This will fail if you are actually trying to upgrade, and also if +you provide a bogus version: + + >>> dotu.downgrade_to('9P2000.L') + Traceback (most recent call last): + ... + KeyError: '9P2000.L' + >>> dotu.downgrade_to('we never heard of this protocol') + Traceback (most recent call last): + ... + KeyError: 'we never heard of this protocol' + +Hence you might use: + + try: + proto = protocol.dotl.downgrade(vstr) + except KeyError: + pkt = protocol.plain.pack(fcall='Rerror', + args={'tag': tag, 'errstr': 'unknown protocol version ' + '{0!r}'.format(vstr)}) + else: + pkt = proto.pack(fcall='Rversion', args={'tag': tag, 'msize': msize}) + +When using a PFOD instance, it is slightly more efficient to use +pack_from(): + + try: + proto = protocol.dotl.downgrade(vstr) + reply = protocol.rrd.Rversion(tag=tag, msize=msize) + except KeyError: + proto = protocol.plain + reply = protocol.rrd.Rerror(tag=tag, + errstr='unknown protocol version {0!r}'.format(vstr)) + pkt = proto.pack_from(reply) + +does the equivalent of the try/except/else variant. Note that +the protocol.rrd.Rversion() instance has version=None. Like +proto.pack, the pack_from will detect this "missing" value and +fill it in. + +Because errors vary (one should use Rlerror for dotl and Rerror +for dotu and plain), and it's convenient to use an Exception +instance for an error, all protocols provide .error(). This +builds the appropriate kind of error response, extracting and +converting errno's and error messages as appropriate. + +If is an instance of Exception, err.errno provides the errnum +or ecode value (if used, for dotu and dotl) and err.strerror as the +errstr value (if used, for plain 9p2000). Otherwise err should be +an integer, and we'll use os.strerror() to get a message. + +When using plain 9P2000 this sends error *messages*: + + >>> import errno, os + >>> utf8 = os.strerror(errno.ENOENT).encode('utf-8') + >>> pkt = None + >>> try: + ... os.open('presumably this file does not exist here', 0) + ... except OSError as err: + ... pkt = plain.error(1, err) + ... + >>> pkt[-len(utf8):] == utf8 + True + >>> pkt2 = plain.error(1, errno.ENOENT) + >>> pkt == pkt2 + True + +When using 9P2000.u it sends the error code as well, and when +using 9P2000.L it sends only the error code (and more error +codes can pass through): + + >>> len(pkt) + 34 + >>> len(dotu.error(1, errno.ENOENT)) + 38 + >>> len(dotl.error(1, errno.ENOENT)) + 11 + +For even more convenience (and another slight speed hack), the +protocol has member functions for each valid pfod, which +effectively do a pack_from of a pfod built from the arguments. In +the above example this is not very useful (because we want two +different replies), but for Rlink, for instance, which has only +a tag, a server might implement Tlink() as: + + def do_Tlink(proto, data): # data will be a protocol.rrd.Tlink(...) + tag = data.tag + dfid = data.dfid + fid = data.fid + name = data.name + ... some code to set up for doing the link link ... + try: + os.link(path1, path2) + except OSError as err: + return proto.error(tag, err) + else: + return proto.Rlink(tag) + + >>> pkt = dotl.Rlink(12345) + >>> struct.unpack('>> vpkt = dotl.Tversion(tag=0, msize=12345) + +To see that this is a valid version packet, let's unpack its bytes. +The overall length is 21 bytes: 4 bytes of size, 1 byte of code 100 +for Tversion, 2 bytes of tag, 4 bytes of msize, 2 bytes of string +length, and 8 bytes of string '9P2000.L'. + + >>> tup = struct.unpack('>> tup[0:5] + (21, 100, 0, 12345, 8) + >>> ''.join(chr(i) for i in tup[5:]) + '9P2000.L' + +Of course, since you can *pack*, you can also *unpack*. It's +possible that the incoming packet is malformed. If so, this +raises various errors (see below). + +Unpack is actually a two step process: first we unpack a header +(where the size is already removed and is implied by len(data)), +then we unpack the data within the packet. You can invoke the +first step separately. Furthermore, there's a noerror argument +that leaves some fields set to None or empty strings, if the +packet is too short. (Note that we need a hack for py2k vs py3k +strings here, for doctests. Also, encoding 12345 into a byte +string produces '90', by ASCII luck!) + + >>> pkt = pkt[4:] # strip generated size + >>> import sys + >>> py3k = sys.version_info[0] >= 3 + >>> b2s = lambda x: x.decode('utf-8') if py3k else x + >>> d = plain.unpack_header(pkt[0:1], noerror=True) + >>> d.data = b2s(d.data) + >>> d + Header(size=5, dsize=0, fcall=71, data='') + >>> d = plain.unpack_header(pkt[0:2], noerror=True) + >>> d.data = b2s(d.data) + >>> d + Header(size=6, dsize=1, fcall=71, data='9') + +Without noerror=True a short packet raises a SequenceError: + + >>> plain.unpack_header(pkt[0:0]) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + SequenceError: out of data while unpacking 'fcall' + +Of course, a normal packet decodes fine: + + >>> d = plain.unpack_header(pkt) + >>> d.data = b2s(d.data) + >>> d + Header(size=7, dsize=2, fcall=71, data='90') + +but one that is too *long* potentially raises a SequencError. +(This is impossible for a header, though, since the size and +data size are both implied: either there is an fcall code, and +the rest of the bytes are "data", or there isn't and the packet +is too short. So we can only demonstrate this for regular +unpack; see below.) + +Note that all along, this has been decoding Rlink (fcall=71), +which is not valid for plain 9P2000 protocol. It's up to the +caller to check: + + >>> plain.supports(71) + False + + >>> plain.unpack(pkt) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + SequenceError: invalid fcall 'Rlink' for 9P2000 + >>> dotl.unpack(pkt) + Rlink(tag=12345) + +However, the unpack() method DOES check that the fcall type is +valid, even if you supply noerror=True. This is because we can +only really decode the header, not the data, if the fcall is +invalid: + + >>> plain.unpack(pkt, noerror=True) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + SequenceError: invalid fcall 'Rlink' for 9P2000 + +The same applies to much-too-short packets even if noerror is set. +Specifically, if the (post-"size") header shortens down to the empty +string, the fcall will be None: + + >>> dotl.unpack(b'', noerror=True) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + SequenceError: invalid fcall None for 9P2000.L + +If there is at least a full header, though, noerror will do the obvious: + + >>> dotl.unpack(pkt[0:1], noerror=True) + Rlink(tag=None) + >>> dotl.unpack(pkt[0:2], noerror=True) + Rlink(tag=None) + +If the packet is too long, noerror suppresses the SequenceError: + + >>> dotl.unpack(pkt + b'x') # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + SequenceError: 1 byte(s) unconsumed + >>> dotl.unpack(pkt + b'x', noerror=True) + Rlink(tag=12345) + +To pack a stat object when producing data for reading a directory, +use pack_wirestat. This puts a size in front of the packed stat +data (they're represented this way in read()-of-directory data, +but not elsewhere). + +To unpack the result of a Tstat or a read() on a directory, use +unpack_wirestat. The stat values are variable length so this +works with offsets. If the packet is truncated, you'll get a +SequenceError, but just as for header unpacking, you can use +noerror to suppress this. + +(First, we'll need to build some valid packet data.) + + >>> statobj = td.stat(type=0,dev=0,qid=td.qid(0,0,0),mode=0, + ... atime=0,mtime=0,length=0,name=b'foo',uid=b'0',gid=b'0',muid=b'0') + >>> data = plain.pack_wirestat(statobj) + >>> len(data) + 55 + +Now we can unpack it: + + >>> newobj, offset = plain.unpack_wirestat(data, 0) + >>> newobj == statobj + True + >>> offset + 55 + +Since the packed data do not include the dotu extensions, we get +a SequenceError if we try to unpack with dotu or dotl: + + >>> dotu.unpack_wirestat(data, 0) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + SequenceError: out of data while unpacking 'extension' + +When using noerror, the returned new offset will be greater +than the length of the packet, after a failed unpack, and some +elements may be None: + + >>> newobj, offset = plain.unpack_wirestat(data[0:10], 0, noerror=True) + >>> offset + 55 + >>> newobj.length is None + True + +Similarly, use unpack_dirent to unpack the result of a dot-L +readdir(), using offsets. (Build them with pack_dirent.) + + >>> dirent = td.dirent(qid=td.qid(1,2,3),offset=0, + ... type=td.DT_REG,name=b'foo') + >>> pkt = dotl.pack_dirent(dirent) + >>> len(pkt) + 27 + +and then: + + >>> newde, offset = dotl.unpack_dirent(pkt, 0) + >>> newde == dirent + True + >>> offset + 27 + +""" + +from __future__ import print_function + +import collections +import os +import re +import sys + +import p9err +import pfod +import sequencer + +SequenceError = sequencer.SequenceError + +fcall_names = {} + +# begin ??? +# to interfere with (eg) the size part of the packet: +# pkt = proto.pack(fcall=protocol.td.Tversion, +# size=123, # wrong +# args={ 'tag': 1, msize: 1000, version: '9p2000.u' }) +# a standard Twrite: +# pkt = proto.pack(fcall=protocol.td.Twrite, +# args={ 'tag': 1, 'fid': 2, 'offset': 0, 'data': b'rawdata' }) +# or: +# pkt = proto.pack(fcall=protocol.td.Twrite, +# data=proto.Twrite(tag=1, fid=2, offset=0, data=b'rawdata' }) +# a broken Twrite: +# pkt = proto.pack(fcall=protocol.td.Twrite, +# args={ 'tag': 1, 'fid': 2, 'offset': 0, 'count': 99, +# 'data': b'rawdata' }) -- XXX won't work (yet?) +# +# build a QID: (td => typedefs and defines) +# qid = protocol.td.qid(type=protocol.td.QTFILE, version=1, path=2) +# build the Twrite data as a data structure: +# wrdata = protocol.td.Twrite(tag=1, fid=2, offset=0, data=b'rawdata') +# +# turn incoming byte stream data into a Header and remaining data: +# foo = proto.pack(data) + +class _PackInfo(object): + """ + Essentially just a Sequencer, except that we remember + if there are any :auto annotations on any of the coders, + and we check for coders that are string coders ('data[size]'). + + This could in theory be a recursive check, but in practice + all the automatics are at the top level, and we have no mechanism + to pass down inner automatics. + """ + def __init__(self, seq): + self.seq = seq + self.autos = None + for pair in seq: # (cond, code) pair + sub = pair[1] + if sub.aux is None: + continue + assert sub.aux == 'auto' or sub.aux == 'len' + if self.autos is None: + self.autos = [] + self.autos.append(pair) + + def __repr__(self): + return '{0}({1!r})'.format(self.__class__.__name__, self.seq) + + def pack(self, auto_vars, conditions, data, rodata): + """ + Pack data. Insert automatic and/or counted variables + automatically, if they are not already set in the data. + + If rodata ("read-only data") is True we make sure not + to modify the caller's data. Since data is a PFOD rather + than a normal ordered dictionary, we use _copy(). + """ + if self.autos: + for cond, sub in self.autos: + # False conditionals don't need to be filled-in. + if cond is not None and not conditions[cond]: + continue + if sub.aux == 'auto': + # Automatic variable, e.g., version. The + # sub-coder's name ('version') is the test item. + if data.get(sub.name) is None: + if rodata: + data = data._copy() + rodata = False + data[sub.name] = auto_vars[sub.name] + else: + # Automatic length, e.g., data[count]. The + # sub-coders's repeat item ('count') is the + # test item. Of course, it's possible that + # the counted item is missing as well. If so + # we just leave both None and take the + # encoding error. + assert sub.aux == 'len' + if data.get(sub.repeat) is not None: + continue + item = data.get(sub.name) + if item is not None: + if rodata: + data = data._copy() + rodata = False + data[sub.repeat] = len(item) + return self.seq.pack(data, conditions) + +class _P9Proto(object): + def __init__(self, auto_vars, conditions, p9_data, pfods, index): + self.auto_vars = auto_vars # currently, just version + self.conditions = conditions # '.u' + self.pfods = pfods # dictionary, maps pfod to packinfo + self.index = index # for comparison: plain < dotu < dotl + + self.use_rlerror = rrd.Rlerror in pfods + + for dtype in pfods: + name = dtype.__name__ + # For each Txxx/Rxxx, define a self.() to + # call self.pack_from(). + # + # The packinfo is from _Packinfo(seq); the fcall and + # seq come from p9_data.protocol[]. + proto_tuple = p9_data.protocol[name] + assert dtype == proto_tuple[0] + packinfo = pfods[dtype] + # in theory we can do this with no names using nested + # lambdas, but that's just too confusing, so let's + # do it with nested functions instead. + def builder(constructor=dtype, packinfo=packinfo): + "return function that calls _pack_from with built PFOD" + def invoker(self, *args, **kwargs): + "build PFOD and call _pack_from" + return self._pack_from(constructor(*args, **kwargs), + rodata=False, caller=None, + packinfo=packinfo) + return invoker + func = builder() + func.__name__ = name + func.__doc__ = 'pack from {0}'.format(name) + setattr(self.__class__, name, func) + + def __repr__(self): + return '{0}({1!r})'.format(self.__class__.__name__, self.version) + + def __str__(self): + return self.version + + # define rich-comparison operators, so we can, e.g., test vers > plain + def __lt__(self, other): + return self.index < other.index + def __le__(self, other): + return self.index <= other.index + def __eq__(self, other): + return self.index == other.index + def __ne__(self, other): + return self.index != other.index + def __gt__(self, other): + return self.index > other.index + def __ge__(self, other): + return self.index >= other.index + + def downgrade_to(self, other_name): + """ + Downgrade from this protocol to a not-greater one. + + Raises KeyError if other_name is not a valid protocol, + or this is not a downgrade (with setting back to self + considered a valid "downgrade", i.e., we're doing subseteq + rather than subset). + """ + if not isinstance(other_name, str) and isinstance(other_name, bytes): + other_name = other_name.decode('utf-8', 'surrogateescape') + other = p9_version(other_name) + if other > self: + raise KeyError(other_name) + return other + + def error(self, tag, err): + "produce Rerror or Rlerror, whichever is appropriate" + if isinstance(err, Exception): + errnum = err.errno + errmsg = err.strerror + else: + errnum = err + errmsg = os.strerror(errnum) + if self.use_rlerror: + return self.Rlerror(tag=tag, ecode=p9err.to_dotl(errnum)) + return self.Rerror(tag=tag, errstr=errmsg, + errnum=p9err.to_dotu(errnum)) + + def pack(self, *args, **kwargs): + "pack up a pfod or fcall-and-arguments" + fcall = kwargs.pop('fcall', None) + if fcall is None: + # Called without fcall=... + # This requires that args have one argument that + # is the PFOD; kwargs should be empty (but we'll take + # data=pfod as well). The size is implied, and + # fcall comes from the pfod. + data = kwargs.pop('data', None) + if data is None: + if len(args) != 1: + raise TypeError('pack() with no fcall requires 1 argument') + data = args[0] + if len(kwargs): + raise TypeError('pack() got an unexpected keyword argument ' + '{0}'.format(kwargs.popitem()[0])) + return self._pack_from(data, True, 'pack', None) + + # Called as pack(fcall=whatever, data={...}). + # The data argument must be a dictionary since we're going to + # apply ** to it in the call to build the PFOD. Note that + # it could already be a PFOD, which is OK, but we're going to + # copy it to a new one regardless (callers that have a PFOD + # should use pack_from instead). + if len(args): + raise TypeError('pack() got unexpected arguments ' + '{0!r}'.format(args)) + data = kwargs.pop('args', None) + if len(kwargs): + raise TypeError('pack() got an unexpected keyword argument ' + '{0}'.format(kwargs.popitem()[0])) + if not isinstance(data, dict): + raise TypeError('pack() with fcall and data ' + 'requires data to be a dictionary') + try: + name = fcall_names[fcall] + except KeyError: + raise TypeError('pack(): {0} is not a valid ' + 'fcall value'.format(fcall)) + cls = getattr(rrd, name) + data = cls(**data) + return self._pack_from(data, False, 'pack', None) + + def pack_from(self, data): + "pack from pfod data, using its type to determine fcall" + return self._pack_from(data, True, 'pack_from', None) + + def _pack_from(self, data, rodata, caller, packinfo): + """ + Internal pack(): called from both invokers (self.Tversion, + self.Rwalk, etc.) and from pack and pack_from methods. + "caller" says which. If rodata is True we're not supposed to + modify the incoming data, as it may belong to someone + else. Some calls to pack() build a PFOD and hence pass in + False. + + The predefined invokers pass in a preconstructed PFOD, + *and* set rodata=False, *and* provide a packinfo, so that + we never have to copy, nor look up the packinfo. + """ + if caller is not None: + assert caller in ('pack', 'pack_from') and packinfo is None + # Indirect call from pack_from(), or from pack() after + # pack() built a PFOD. We make sure this kind of PFOD + # is allowed for this protocol. + packinfo = self.pfods.get(data.__class__, None) + if packinfo is None: + raise TypeError('{0}({1!r}): invalid ' + 'input'.format(caller, data)) + + # Pack the data + pkt = packinfo.pack(self.auto_vars, self.conditions, data, rodata) + + fcall = data.__class__.__name__ + fcall_code = getattr(td, fcall) + + # That's the inner data; now we must add the header, + # with fcall (translated back to byte code value) and + # outer data. The size is implied by len(pkt). There + # are no other auto variables, and no conditions. + # + # NB: the size includes the size of the header itself + # and the fcall code byte, plus the size of the data. + data = _9p_data.header_pfod(size=4 + 1 + len(pkt), dsize=len(pkt), + fcall=fcall_code, data=pkt) + empty = None # logically should be {}, but not actually used below + pkt = _9p_data.header_pack_seq.pack(data, empty) + return pkt + + @staticmethod + def unpack_header(bstring, noerror=False): + """ + Unpack header. + + We know that our caller has already stripped off the + overall size field (4 bytes), leaving us with the fcall + (1 byte) and data (len(bstring)-1 bytes). If len(bstring) + is 0, this is an invalid header: set dsize to 0 and let + fcall become None, if noerror is set. + """ + vdict = _9p_data.header_pfod() + vdict['size'] = len(bstring) + 4 + vdict['dsize'] = max(0, len(bstring) - 1) + _9p_data.header_unpack_seq.unpack(vdict, None, bstring, noerror) + return vdict + + def unpack(self, bstring, noerror=False): + "produce filled PFOD from fcall in packet" + vdict = self.unpack_header(bstring, noerror) + # NB: vdict['dsize'] is used internally during unpack, to + # find out how many bytes to copy to vdict['data'], but by + # the time unpack is done, we no longer need it. + # + # size = vdict['size'] + # dsize = vdict['dsize'] + fcall = vdict['fcall'] + data = vdict['data'] + # Note: it's possible for size and/or fcall to be None, + # when noerror is true. However, if we support fcall, then + # clearly fcall is not None; and since fcall follows size, + # we can always proceed if we support fcall. + if self.supports(fcall): + fcall = fcall_names[fcall] + cls = getattr(rrd, fcall) + seq = self.pfods[cls].seq + elif fcall == td.Rlerror: + # As a special case for diod, we accept Rlerror even + # if it's not formally part of the protocol. + cls = rrd.Rlerror + seq = dotl.pfods[rrd.Rlerror].seq + else: + fcall = fcall_names.get(fcall, fcall) + raise SequenceError('invalid fcall {0!r} for ' + '{1}'.format(fcall, self)) + vdict = cls() + seq.unpack(vdict, self.conditions, data, noerror) + return vdict + + def pack_wirestat(self, statobj): + """ + Pack a stat object to appear as data returned by read() + on a directory. Essentially, we prefix the data with a size. + """ + data = td.stat_seq.pack(statobj, self.conditions) + return td.wirestat_seq.pack({'size': len(data), 'data': data}, {}) + + def unpack_wirestat(self, bstring, offset, noerror=False): + """ + Produce the next td.stat object from byte-string, + returning it and new offset. + """ + statobj = td.stat() + d = { 'size': None } + newoff = td.wirestat_seq.unpack_from(d, self.conditions, bstring, + offset, noerror) + size = d['size'] + if size is None: # implies noerror; newoff==offset+2 + return statobj, newoff + # We now have size and data. If noerror, data might be + # too short, in which case we'll unpack a partial statobj. + # Or (with or without noeror), data might be too long, so + # that while len(data) == size, not all the data get used. + # That may be allowed by the protocol: it's not clear. + data = d['data'] + used = td.stat_seq.unpack_from(statobj, self.conditions, data, + 0, noerror) + # if size != used ... then what? + return statobj, newoff + + def pack_dirent(self, dirent): + """ + Dirents (dot-L only) are easy to pack, but we provide + this function for symmetry. (Should we raise an error + if called on plain or dotu?) + """ + return td.dirent_seq.pack(dirent, self.conditions) + + def unpack_dirent(self, bstring, offset, noerror=False): + """ + Produces the next td.dirent object from byte-string, + returning it and new offset. + """ + deobj = td.dirent() + offset = td.dirent_seq.unpack_from(deobj, self.conditions, bstring, + offset, noerror) + return deobj, offset + + def supports(self, fcall): + """ + Return True if and only if this protocol supports the + given fcall. + + >>> plain.supports(100) + True + >>> plain.supports('Tversion') + True + >>> plain.supports('Rlink') + False + """ + fcall = fcall_names.get(fcall, None) + if fcall is None: + return False + cls = getattr(rrd, fcall) + return cls in self.pfods + + def get_version(self, as_bytes=True): + "get Plan 9 protocol version, as string or (default) as bytes" + ret = self.auto_vars['version'] + if as_bytes and not isinstance(ret, bytes): + ret = ret.encode('utf-8') + return ret + + @property + def version(self): + "Plan 9 protocol version" + return self.get_version(as_bytes=False) + +DEBUG = False + +# This defines a special en/decoder named "s" using a magic +# builtin. This and stat are the only variable-length +# decoders, and this is the only recursively-variable-length +# one (i.e., stat decoding is effectively fixed size once we +# handle strings). So this magic avoids the need for recursion. +# +# Note that _string_ is, in effect, size[2] orig_var[size]. +_STRING_MAGIC = '_string_' +SDesc = "typedef s: " + _STRING_MAGIC + +# This defines an en/decoder for type "qid", +# which en/decodes 1 byte called type, 4 called version, and +# 8 called path (for a total of 13 bytes). +# +# It also defines QTDIR, QTAPPEND, etc. (These are not used +# for en/decode, or at least not yet.) +QIDDesc = """\ +typedef qid: type[1] version[4] path[8] + + #define QTDIR 0x80 + #define QTAPPEND 0x40 + #define QTEXCL 0x20 + #define QTMOUNT 0x10 + #define QTAUTH 0x08 + #define QTTMP 0x04 + #define QTSYMLINK 0x02 + #define QTFILE 0x00 +""" + +# This defines a stat decoder, which has a 9p2000 standard front, +# followed by an optional additional portion. +# +# The constants are named DMDIR etc. +STATDesc = """ +typedef stat: type[2] dev[4] qid[qid] mode[4] atime[4] mtime[4] \ +length[8] name[s] uid[s] gid[s] muid[s] \ +{.u: extension[s] n_uid[4] n_gid[4] n_muid[4] } + + #define DMDIR 0x80000000 + #define DMAPPEND 0x40000000 + #define DMMOUNT 0x10000000 + #define DMAUTH 0x08000000 + #define DMTMP 0x04000000 + #define DMSYMLINK 0x02000000 + /* 9P2000.u extensions */ + #define DMDEVICE 0x00800000 + #define DMNAMEDPIPE 0x00200000 + #define DMSOCKET 0x00100000 + #define DMSETUID 0x00080000 + #define DMSETGID 0x00040000 +""" + +# This defines a wirestat decoder. A wirestat is a size and then +# a (previously encoded, or future-decoded) stat. +WirestatDesc = """ +typedef wirestat: size[2] data[size] +""" + +# This defines a dirent decoder, which has a dot-L specific format. +# +# The dirent type fields are defined as DT_* (same as BSD and Linux). +DirentDesc = """ +typedef dirent: qid[qid] offset[8] type[1] name[s] + + #define DT_UNKNOWN 0 + #define DT_FIFO 1 + #define DT_CHR 2 + #define DT_DIR 4 + #define DT_BLK 6 + #define DT_REG 8 + #define DT_LNK 10 + #define DT_SOCK 12 + #define DT_WHT 14 +""" + +# N.B.: this is largely a slightly more rigidly formatted variant of +# the contents of: +# https://github.com/chaos/diod/blob/master/protocol.md +# +# Note that = : ... assigns names for the fcall +# fcall (function call) table. Names without "= value" are +# assumed to be the previous value +1 (and the two names are +# also checked to make sure they are Tfoo,Rfoo). +ProtocolDesc = """\ +Rlerror.L = 7: tag[2] ecode[4] + ecode is a numerical Linux errno + +Tstatfs.L = 8: tag[2] fid[4] +Rstatfs.L: tag[2] type[4] bsize[4] blocks[8] bfree[8] bavail[8] \ + files[8] ffree[8] fsid[8] namelen[4] + Rstatfs corresponds to Linux statfs structure: + struct statfs { + long f_type; /* type of file system */ + long f_bsize; /* optimal transfer block size */ + long f_blocks; /* total data blocks in file system */ + long f_bfree; /* free blocks in fs */ + long f_bavail; /* free blocks avail to non-superuser */ + long f_files; /* total file nodes in file system */ + long f_ffree; /* free file nodes in fs */ + fsid_t f_fsid; /* file system id */ + long f_namelen; /* maximum length of filenames */ + }; + + This comes from nowhere obvious... + #define FSTYPE 0x01021997 + +Tlopen.L = 12: tag[2] fid[4] flags[4] +Rlopen.L: tag[2] qid[qid] iounit[4] + lopen prepares fid for file (or directory) I/O. + + flags contains Linux open(2) flag bits, e.g., O_RDONLY, O_RDWR, O_WRONLY. + + #define L_O_CREAT 000000100 + #define L_O_EXCL 000000200 + #define L_O_NOCTTY 000000400 + #define L_O_TRUNC 000001000 + #define L_O_APPEND 000002000 + #define L_O_NONBLOCK 000004000 + #define L_O_DSYNC 000010000 + #define L_O_FASYNC 000020000 + #define L_O_DIRECT 000040000 + #define L_O_LARGEFILE 000100000 + #define L_O_DIRECTORY 000200000 + #define L_O_NOFOLLOW 000400000 + #define L_O_NOATIME 001000000 + #define L_O_CLOEXEC 002000000 + #define L_O_SYNC 004000000 + #define L_O_PATH 010000000 + #define L_O_TMPFILE 020000000 + +Tlcreate.L = 14: tag[2] fid[4] name[s] flags[4] mode[4] gid[4] +Rlcreate.L: tag[2] qid[qid] iounit[4] + lcreate creates a regular file name in directory fid and prepares + it for I/O. + + fid initially represents the parent directory of the new file. + After the call it represents the new file. + + flags contains Linux open(2) flag bits (including O_CREAT). + + mode contains Linux creat(2) mode (permissions) bits. + + gid is the effective gid of the caller. + +Tsymlink.L = 16: tag[2] dfid[4] name[s] symtgt[s] gid[4] +Rsymlink.L: tag[2] qid[qid] + symlink creates a symbolic link name in directory dfid. The + link will point to symtgt. + + gid is the effective group id of the caller. + + The qid for the new symbolic link is returned in the reply. + +Tmknod.L = 18: tag[2] dfid[4] name[s] mode[4] major[4] minor[4] gid[4] +Rmknod.L: tag[2] qid[qid] + mknod creates a device node name in directory dfid with major + and minor numbers. + + mode contains Linux mknod(2) mode bits. (Note that these + include the S_IFMT bits which may be S_IFBLK, S_IFCHR, or + S_IFSOCK.) + + gid is the effective group id of the caller. + + The qid for the new device node is returned in the reply. + +Trename.L = 20: tag[2] fid[4] dfid[4] name[s] +Rrename.L: tag[2] + rename renames a file system object referenced by fid, to name + in the directory referenced by dfid. + + This operation will eventually be replaced by renameat. + +Treadlink.L = 22: tag[2] fid[4] +Rreadlink.L: tag[2] target[s] + readlink returns the contents of teh symbolic link referenced by fid. + +Tgetattr.L = 24: tag[2] fid[4] request_mask[8] +Rgetattr.L: tag[2] valid[8] qid[qid] mode[4] uid[4] gid[4] nlink[8] \ + rdev[8] size[8] blksize[8] blocks[8] \ + atime_sec[8] atime_nsec[8] mtime_sec[8] mtime_nsec[8] \ + ctime_sec[8] ctime_nsec[8] btime_sec[8] btime_nsec[8] \ + gen[8] data_version[8] + + getattr gets attributes of a file system object referenced by fid. + The response is intended to follow pretty closely the fields + returned by the stat(2) system call: + + struct stat { + dev_t st_dev; /* ID of device containing file */ + ino_t st_ino; /* inode number */ + mode_t st_mode; /* protection */ + nlink_t st_nlink; /* number of hard links */ + uid_t st_uid; /* user ID of owner */ + gid_t st_gid; /* group ID of owner */ + dev_t st_rdev; /* device ID (if special file) */ + off_t st_size; /* total size, in bytes */ + blksize_t st_blksize; /* blocksize for file system I/O */ + blkcnt_t st_blocks; /* number of 512B blocks allocated */ + time_t st_atime; /* time of last access */ + time_t st_mtime; /* time of last modification */ + time_t st_ctime; /* time of last status change */ + }; + + The differences are: + + * st_dev is omitted + * st_ino is contained in the path component of qid + * times are nanosecond resolution + * btime, gen and data_version fields are reserved for future use + + Not all fields are valid in every call. request_mask is a bitmask + indicating which fields are requested. valid is a bitmask + indicating which fields are valid in the response. The mask + values are as follows: + + #define GETATTR_MODE 0x00000001 + #define GETATTR_NLINK 0x00000002 + #define GETATTR_UID 0x00000004 + #define GETATTR_GID 0x00000008 + #define GETATTR_RDEV 0x00000010 + #define GETATTR_ATIME 0x00000020 + #define GETATTR_MTIME 0x00000040 + #define GETATTR_CTIME 0x00000080 + #define GETATTR_INO 0x00000100 + #define GETATTR_SIZE 0x00000200 + #define GETATTR_BLOCKS 0x00000400 + + #define GETATTR_BTIME 0x00000800 + #define GETATTR_GEN 0x00001000 + #define GETATTR_DATA_VERSION 0x00002000 + + #define GETATTR_BASIC 0x000007ff /* Mask for fields up to BLOCKS */ + #define GETATTR_ALL 0x00003fff /* Mask for All fields above */ + +Tsetattr.L = 26: tag[2] fid[4] valid[4] mode[4] uid[4] gid[4] size[8] \ + atime_sec[8] atime_nsec[8] mtime_sec[8] mtime_nsec[8] +Rsetattr.L: tag[2] + setattr sets attributes of a file system object referenced by + fid. As with getattr, valid is a bitmask selecting which + fields to set, which can be any combination of: + + mode - Linux chmod(2) mode bits. + + uid, gid - New owner, group of the file as described in Linux chown(2). + + size - New file size as handled by Linux truncate(2). + + atime_sec, atime_nsec - Time of last file access. + + mtime_sec, mtime_nsec - Time of last file modification. + + The valid bits are defined as follows: + + #define SETATTR_MODE 0x00000001 + #define SETATTR_UID 0x00000002 + #define SETATTR_GID 0x00000004 + #define SETATTR_SIZE 0x00000008 + #define SETATTR_ATIME 0x00000010 + #define SETATTR_MTIME 0x00000020 + #define SETATTR_CTIME 0x00000040 + #define SETATTR_ATIME_SET 0x00000080 + #define SETATTR_MTIME_SET 0x00000100 + + If a time bit is set without the corresponding SET bit, the + current system time on the server is used instead of the value + sent in the request. + +Txattrwalk.L = 30: tag[2] fid[4] newfid[4] name[s] +Rxattrwalk.L: tag[2] size[8] + xattrwalk gets a newfid pointing to xattr name. This fid can + later be used to read the xattr value. If name is NULL newfid + can be used to get the list of extended attributes associated + with the file system object. + +Txattrcreate.L = 32: tag[2] fid[4] name[s] attr_size[8] flags[4] +Rxattrcreate.L: tag[2] + xattrcreate gets a fid pointing to the xattr name. This fid + can later be used to set the xattr value. + + flag is derived from set Linux setxattr. The manpage says + + The flags parameter can be used to refine the semantics of + the operation. XATTR_CREATE specifies a pure create, + which fails if the named attribute exists already. + XATTR_REPLACE specifies a pure replace operation, which + fails if the named attribute does not already exist. By + default (no flags), the extended attribute will be created + if need be, or will simply replace the value if the + attribute exists. + + The actual setxattr operation happens when the fid is clunked. + At that point the written byte count and the attr_size + specified in TXATTRCREATE should be same otherwise an error + will be returned. + +Treaddir.L = 40: tag[2] fid[4] offset[8] count[4] +Rreaddir.L: tag[2] count[4] data[count] + readdir requests that the server return directory entries from + the directory represented by fid, previously opened with + lopen. offset is zero on the first call. + + Directory entries are represented as variable-length records: + qid[qid] offset[8] type[1] name[s] + At most count bytes will be returned in data. If count is not + zero in the response, more data is available. On subsequent + calls, offset is the offset returned in the last directory + entry of the previous call. + +Tfsync.L = 50: tag[2] fid[4] +Rfsync.L: tag[2] + fsync tells the server to flush any cached data associated + with fid, previously opened with lopen. + +Tlock.L = 52: tag[2] fid[4] type[1] flags[4] start[8] length[8] \ + proc_id[4] client_id[s] +Rlock.L: tag[2] status[1] + lock is used to acquire or release a POSIX record lock on fid + and has semantics similar to Linux fcntl(F_SETLK). + + type has one of the values: + + #define LOCK_TYPE_RDLCK 0 + #define LOCK_TYPE_WRLCK 1 + #define LOCK_TYPE_UNLCK 2 + + start, length, and proc_id correspond to the analagous fields + passed to Linux fcntl(F_SETLK): + + struct flock { + short l_type; /* Type of lock: F_RDLCK, F_WRLCK, F_UNLCK */ + short l_whence;/* How to intrprt l_start: SEEK_SET,SEEK_CUR,SEEK_END */ + off_t l_start; /* Starting offset for lock */ + off_t l_len; /* Number of bytes to lock */ + pid_t l_pid; /* PID of process blocking our lock (F_GETLK only) */ + }; + + flags bits are: + + #define LOCK_SUCCESS 0 + #define LOCK_BLOCKED 1 + #define LOCK_ERROR 2 + #define LOCK_GRACE 3 + + The Linux v9fs client implements the fcntl(F_SETLKW) + (blocking) lock request by calling lock with + LOCK_FLAGS_BLOCK set. If the response is LOCK_BLOCKED, + it retries the lock request in an interruptible loop until + status is no longer LOCK_BLOCKED. + + The Linux v9fs client translates BSD advisory locks (flock) to + whole-file POSIX record locks. v9fs does not implement + mandatory locks and will return ENOLCK if use is attempted. + + Because of POSIX record lock inheritance and upgrade + properties, pass-through servers must be implemented + carefully. + +Tgetlock.L = 54: tag[2] fid[4] type[1] start[8] length[8] proc_id[4] \ + client_id[s] +Rgetlock.L: tag[2] type[1] start[8] length[8] proc_id[4] client_id[s] + getlock tests for the existence of a POSIX record lock and has + semantics similar to Linux fcntl(F_GETLK). + + As with lock, type has one of the values defined above, and + start, length, and proc_id correspond to the analagous fields + in struct flock passed to Linux fcntl(F_GETLK), and client_Id + is an additional mechanism for uniquely identifying the lock + requester and is set to the nodename by the Linux v9fs client. + +Tlink.L = 70: tag[2] dfid[4] fid[4] name[s] +Rlink.L: tag[2] + link creates a hard link name in directory dfid. The link + target is referenced by fid. + +Tmkdir.L = 72: tag[2] dfid[4] name[s] mode[4] gid[4] +Rmkdir.L: tag[2] qid[qid] + mkdir creates a new directory name in parent directory dfid. + + mode contains Linux mkdir(2) mode bits. + + gid is the effective group ID of the caller. + + The qid of the new directory is returned in the response. + +Trenameat.L = 74: tag[2] olddirfid[4] oldname[s] newdirfid[4] newname[s] +Rrenameat.L: tag[2] + Change the name of a file from oldname to newname, possible + moving it from old directory represented by olddirfid to new + directory represented by newdirfid. + + If the server returns ENOTSUPP, the client should fall back to + the rename operation. + +Tunlinkat.L = 76: tag[2] dirfd[4] name[s] flags[4] +Runlinkat.L: tag[2] + Unlink name from directory represented by dirfd. If the file + is represented by a fid, that fid is not clunked. If the + server returns ENOTSUPP, the client should fall back to the + remove operation. + + There seems to be only one defined flag: + + #define AT_REMOVEDIR 0x200 + +Tversion = 100: tag[2] msize[4] version[s]:auto +Rversion: tag[2] msize[4] version[s] + + negotiate protocol version + + version establishes the msize, which is the maximum message + size inclusive of the size value that can be handled by both + client and server. + + It also establishes the protocol version. For 9P2000.L + version must be the string 9P2000.L. + +Tauth = 102: tag[2] afid[4] uname[s] aname[s] n_uname[4] +Rauth: tag[2] aqid[qid] + auth initiates an authentication handshake for n_uname. + Rlerror is returned if authentication is not required. If + successful, afid is used to read/write the authentication + handshake (protocol does not specify what is read/written), + and afid is presented in the attach. + +Tattach = 104: tag[2] fid[4] afid[4] uname[s] aname[s] {.u: n_uname[4] } +Rattach: tag[2] qid[qid] + attach introduces a new user to the server, and establishes + fid as the root for that user on the file tree selected by + aname. + + afid can be NOFID (~0) or the fid from a previous auth + handshake. The afid can be clunked immediately after the + attach. + + #define NOFID 0xffffffff + + n_uname, if not set to NONUNAME (~0), is the uid of the + user and is used in preference to uname. Note that it appears + in both .u and .L (unlike most .u-specific features). + + #define NONUNAME 0xffffffff + + v9fs has several modes of access which determine how it uses + attach. In the default access=user, an initial attach is sent + for the user provided in the uname=name mount option, and for + each user that accesses the file system thereafter. For + access=, only the initial attach is sent for and all other + users are denied access by the client. + +Rerror = 107: tag[2] errstr[s] {.u: errnum[4] } + +Tflush = 108: tag[2] oldtag[2] +Rflush: tag[2] + flush aborts an in-flight request referenced by oldtag, if any. + +Twalk = 110: tag[2] fid[4] newfid[4] nwname[2] nwname*(wname[s]) +Rwalk: tag[2] nwqid[2] nwqid*(wqid[qid]) + walk is used to descend a directory represented by fid using + successive path elements provided in the wname array. If + succesful, newfid represents the new path. + + fid can be cloned to newfid by calling walk with nwname set to + zero. + + if nwname==0, fid need not represent a directory. + +Topen = 112: tag[2] fid[4] mode[1] +Ropen: tag[2] qid[qid] iounit[4] + open prepares fid for file (or directory) I/O. + + mode is: + #define OREAD 0 /* open for read */ + #define OWRITE 1 /* open for write */ + #define ORDWR 2 /* open for read and write */ + #define OEXEC 3 /* open for execute */ + + #define OTRUNC 16 /* truncate (illegal if OEXEC) */ + #define OCEXEC 32 /* close on exec (nonsensical) */ + #define ORCLOSE 64 /* remove on close */ + #define ODIRECT 128 /* direct access (.u extension?) */ + +Tcreate = 114: tag[2] fid[4] name[s] perm[4] mode[1] {.u: extension[s] } +Rcreate: tag[2] qid[qid] iounit[4] + create is similar to open; however, the incoming fid is the + diretory in which the file is to be created, and on success, + return, the fid refers to the then-created file. + +Tread = 116: tag[2] fid[4] offset[8] count[4] +Rread: tag[2] count[4] data[count] + perform a read on the file represented by fid. Note that in + v9fs, a read(2) or write(2) system call for a chunk of the + file that won't fit in a single request is broken up into + multiple requests. + + Under 9P2000.L, read cannot be used on directories. See readdir. + +Twrite = 118: tag[2] fid[4] offset[8] count[4] data[count] +Rwrite: tag[2] count[4] + perform a write on the file represented by fid. Note that in + v9fs, a read(2) or write(2) system call for a chunk of the + file that won't fit in a single request is broken up into + multiple requests. + + write cannot be used on directories. + +Tclunk = 120: tag[2] fid[4] +Rclunk: tag[2] + clunk signifies that fid is no longer needed by the client. + +Tremove = 122: tag[2] fid[4] +Rremove: tag[2] + remove removes the file system object represented by fid. + + The fid is always clunked (even on error). + +Tstat = 124: tag[2] fid[4] +Rstat: tag[2] size[2] data[size] + +Twstat = 126: tag[2] fid[4] size[2] data[size] +Rwstat: tag[2] +""" + +class _Token(object): + r""" + A scanned token. + + Tokens have a type (tok.ttype) and value (tok.value). The value + is generally the token itself, although sometimes a prefix and/or + suffix has been removed (for 'label', 'word*', ':aux', and + '[type]' tokens). If prefix and/or suffix are removed, the full + original token is + in its .orig. + + Tokens are: + - 'word', 'word*', or 'label': + '[.\w]+' followed by optional '*' or ':': + + - 'aux': ':' followed by '\w+' (used for :auto annotation) + + - 'type': + open bracket '[', followed by '\w+' or '\d+' (only one of these), + followed by close bracket ']' + + - '(', ')', '{', '}': themeselves + + Each token can have arbitrary leading white space (which is + discarded). + + (Probably should return ':' as a char and handle it in parser, + but oh well.) + """ + def __init__(self, ttype, value, orig=None): + self.ttype = ttype + self.value = value + self.orig = value if orig is None else orig + if self.ttype == 'type' and self.value.isdigit(): + self.ival = int(self.value) + else: + self.ival = None + def __str__(self): + return self.orig + +_Token.tok_expr = re.compile(r'\s*([.\w]+(?:\*|:)?' + r'|:\w+' + r'|\[(?:\w+|\d+)\]' + r'|[(){}])') + +def _scan(string): + """ + Tokenize a string. + + Note: This raises a ValueError with the position of any unmatched + character in the string. + """ + tlist = [] + + # make sure entire string is tokenized properly + pos = 0 + for item in _Token.tok_expr.finditer(string): + span = item.span() + if span[0] != pos: + print('error: unmatched character(s) in input\n{0}\n{1}^'.format( + string, ' ' * pos)) + raise ValueError('unmatched lexeme', pos) + pos = span[1] + tlist.append(item.group(1)) + if pos != len(string): + print('error: unmatched character(s) in input\n{0}\n{1}^'.format( + string, ' ' * pos)) + raise ValueError('unmatched lexeme', pos) + + # classify each token, stripping decorations + result = [] + for item in tlist: + if item in ('(', ')', '{', '}'): + tok = _Token(item, item) + elif item[0] == ':': + tok = _Token('aux', item[1:], item) + elif item.endswith(':'): + tok = _Token('label', item[0:-1], item) + elif item.endswith('*'): + tok = _Token('word*', item[0:-1], item) + elif item[0] == '[': + # integer or named type + if item[-1] != ']': + raise ValueError('internal error: "{0}" is not [...]'.format( + item)) + tok = _Token('type', item[1:-1], item) + else: + tok = _Token('word', item) + result.append(tok) + return result + +def _debug_print_sequencer(seq): + """for debugging""" + print('sequencer is {0!r}'.format(seq), file=sys.stderr) + for i, enc in enumerate(seq): + print(' [{0:d}] = {1}'.format(i, enc), file=sys.stderr) + +def _parse_expr(seq, string, typedefs): + """ + Parse "expression-ish" items, which is a list of: + name[type] + name*(subexpr) (a literal asterisk) + { label ... } + + The "type" may be an integer or a second name. In the case + of a second name it must be something from . + + The meaning of name[integer] is that we are going to encode + or decode a fixed-size field of bytes, using the + given name. + + For name[name2], we can look up name2 in our typedefs table. + The only real typedefs's used here are "stat" and "s"; each + of these expands to a variable-size encode/decode. See the + special case below, though. + + The meaning of name*(...) is: the earlier name will have been + defined by an earlier _parse_expr for this same line. That + earlier name provides a repeat-count. + + Inside the parens we get a name[type] sub-expressino. This may + not recurse further, so we can use a pretty cheesy parser. + + As a special case, given name[name2], we first check whether + name2 is an earlier name a la name*(...). Here the meaning + is much like name2*(name[1]), except that the result is a + simple byte string, rather than an array. + + The meaning of "{ label ... " is that everything following up + to "}" is optional and used only with 9P2000.u and/or 9P2000.L. + Inside the {...} pair is the usual set of tokens, but again + {...} cannot recurse. + + The parse fills in a Sequencer instance, and returns a list + of the parsed names. + """ + names = [] + cond = None + + tokens = collections.deque(_scan(string)) + + def get_subscripted(tokens): + """ + Allows name[integer] and name1[name2] only; returns + tuple after stripping off both tokens, or returns None + and does not strip tokens. + """ + if len(tokens) == 0 or tokens[0].ttype != 'word': + return None + if len(tokens) > 1 and tokens[1].ttype == 'type': + word = tokens.popleft() + return word, tokens.popleft() + return None + + def lookup(name, typeinfo, aux=None): + """ + Convert cond (if not None) to its .value, so that instead + of (x, '.u') we get '.u'. + + Convert typeinfo to an encdec. Typeinfo may be 1/2/4/8, or + one of our typedef names. If it's a typedef name it will + normally correspond to an EncDecTyped, but we have one special + case for string types, and another for using an earlier-defined + variable. + """ + condval = None if cond is None else cond.value + if typeinfo.ival is None: + try: + cls, sub = typedefs[typeinfo.value] + except KeyError: + raise ValueError('unknown type name {0}'.format(typeinfo)) + # the type name is typeinfo.value; the corresponding + # pfod class is cls; the *variable* name is name; + # and the sub-sequence is sub. But if cls is None + # then it's our string type. + if cls is None: + encdec = sequencer.EncDecSimple(name, _STRING_MAGIC, aux) + else: + encdec = sequencer.EncDecTyped(cls, name, sub, aux) + else: + if typeinfo.ival not in (1, 2, 4, 8): + raise ValueError('bad integer code in {0}'.format(typeinfo)) + encdec = sequencer.EncDecSimple(name, typeinfo.ival, aux) + return condval, encdec + + def emit_simple(name, typeinfo, aux=None): + """ + Emit name[type]. We may be inside a conditional; if so + cond is not None. + """ + condval, encdec = lookup(name, typeinfo, aux) + seq.append_encdec(condval, encdec) + names.append(name) + + def emit_repeat(name1, name2, typeinfo): + """ + Emit name1*(name2[type]). + + Note that the conditional is buried in the sub-coder for + name2. It must be passed through anyway in case the sub- + coder is only partly conditional. If the sub-coder is + fully conditional, each sub-coding uses or produces no + bytes and hence the array itself is effectively conditional + as well (it becomes name1 * [None]). + + We don't (currently) have any auxiliary data for arrays. + """ + if name1 not in names: + raise ValueError('{0}*({1}[{2}]): ' + '{0} undefined'.format(name1, name2, + typeinfo.value)) + condval, encdec = lookup(name2, typeinfo) + encdec = sequencer.EncDecA(name1, name2, encdec) + seq.append_encdec(condval, encdec) + names.append(name2) + + def emit_bytes_repeat(name1, name2): + """ + Emit name1[name2], e.g., data[count]. + """ + condval = None if cond is None else cond.value + # Note that the two names are reversed when compared to + # count*(data[type]). The "sub-coder" is handled directly + # by EncDecA, hence is None. + # + # As a peculiar side effect, all bytes-repeats cause the + # count itself to become automatic (to have an aux of 'len'). + encdec = sequencer.EncDecA(name2, name1, None, 'len') + seq.append_encdec(condval, encdec) + names.append(name1) + + supported_conditions = ('.u') + while tokens: + token = tokens.popleft() + if token.ttype == 'label': + raise ValueError('misplaced label') + if token.ttype == 'aux': + raise ValueError('misplaced auxiliary') + if token.ttype == '{': + if cond is not None: + raise ValueError('nested "{"') + if len(tokens) == 0: + raise ValueError('unclosed "{"') + cond = tokens.popleft() + if cond.ttype != 'label': + raise ValueError('"{" not followed by cond label') + if cond.value not in supported_conditions: + raise ValueError('unsupported condition "{0}"'.format( + cond.value)) + continue + if token.ttype == '}': + if cond is None: + raise ValueError('closing "}" w/o opening "{"') + cond = None + continue + if token.ttype == 'word*': + if len(tokens) == 0 or tokens[0].ttype != '(': + raise ValueError('{0} not followed by (...)'.format(token)) + tokens.popleft() + repeat = get_subscripted(tokens) + if repeat is None: + raise ValueError('parse error after {0}('.format(token)) + if len(tokens) == 0 or tokens[0].ttype != ')': + raise ValueError('missing ")" after {0}({1}{2}'.format( + token, repeat[0], repeat[1])) + tokens.popleft() + # N.B.: a repeat cannot have an auxiliary info (yet?). + emit_repeat(token.value, repeat[0].value, repeat[1]) + continue + if token.ttype == 'word': + # Special case: _STRING_MAGIC turns into a string + # sequencer. This should be used with just one + # typedef (typedef s: _string_). + if token.value == _STRING_MAGIC: + names.append(_STRING_MAGIC) # XXX temporary + continue + if len(tokens) == 0 or tokens[0].ttype != 'type': + raise ValueError('parse error after {0}'.format(token)) + type_or_size = tokens.popleft() + # Check for name[name2] where name2 is a word (not a + # number) that is in the names[] array. + if type_or_size.value in names: + # NB: this cannot have auxiliary info. + emit_bytes_repeat(token.value, type_or_size.value) + continue + if len(tokens) > 0 and tokens[0].ttype == 'aux': + aux = tokens.popleft() + if aux.value != 'auto': + raise ValueError('{0}{1}: only know "auto", not ' + '{2}'.format(token, type_or_size, + aux.value)) + emit_simple(token.value, type_or_size, aux.value) + else: + emit_simple(token.value, type_or_size) + continue + raise ValueError('"{0}" not valid here"'.format(token)) + + if cond is not None: + raise ValueError('unclosed "}"') + + return names + +class _ProtoDefs(object): + def __init__(self): + # Scan our typedefs. This may execute '#define's as well. + self.typedefs = {} + self.defines = {} + typedef_re = re.compile(r'\s*typedef\s+(\w+)\s*:\s*(.*)') + self.parse_lines('SDesc', SDesc, typedef_re, self.handle_typedef) + self.parse_lines('QIDDesc', QIDDesc, typedef_re, self.handle_typedef) + self.parse_lines('STATDesc', STATDesc, typedef_re, self.handle_typedef) + self.parse_lines('WirestatDesc', WirestatDesc, typedef_re, + self.handle_typedef) + self.parse_lines('DirentDesc', DirentDesc, typedef_re, + self.handle_typedef) + + # Scan protocol (the bulk of the work). This, too, may + # execute '#define's. + self.protocol = {} + proto_re = re.compile(r'(\*?\w+)(\.\w+)?\s*(?:=\s*(\d+))?\s*:\s*(.*)') + self.prev_proto_value = None + self.parse_lines('ProtocolDesc', ProtocolDesc, + proto_re, self.handle_proto_def) + + self.setup_header() + + # set these up for export() + self.plain = {} + self.dotu = {} + self.dotl = {} + + def parse_lines(self, name, text, regexp, match_handler): + """ + Parse a sequence of lines. Match each line using the + given regexp, or (first) as a #define line. Note that + indented lines are either #defines or are commentary! + + If hnadling raises a ValueError, we complain and include + the appropriate line offset. Then we sys.exit(1) (!). + """ + define = re.compile(r'\s*#define\s+(\w+)\s+([^/]*)' + r'(\s*/\*.*\*/)?\s*$') + for lineoff, line in enumerate(text.splitlines()): + try: + match = define.match(line) + if match: + self.handle_define(*match.groups()) + continue + match = regexp.match(line) + if match: + match_handler(*match.groups()) + continue + if len(line) and not line[0].isspace(): + raise ValueError('unhandled line: {0}'.format(line)) + except ValueError as err: + print('Internal error while parsing {0}:\n' + ' {1}\n' + '(at line offset +{2}, discounting \\-newline)\n' + 'The original line in question reads:\n' + '{3}'.format(name, err.args[0], lineoff, line), + file=sys.stderr) + sys.exit(1) + + def handle_define(self, name, value, comment): + """ + Handle #define match. + + The regexp has three fields, matching the name, value, + and possibly-empty comment; these are our arguments. + """ + # Obnoxious: int(,0) requires new 0o syntax in py3k; + # work around by trying twice, once with base 0, then again + # with explicit base 8 if the first attempt fails. + try: + value = int(value, 0) + except ValueError: + value = int(value, 8) + if DEBUG: + print('define: defining {0} as {1:x}'.format(name, value), + file=sys.stderr) + if name in self.defines: + raise ValueError('redefining {0}'.format(name)) + self.defines[name] = (value, comment) + + def handle_typedef(self, name, expr): + """ + Handle typedef match. + + The regexp has just two fields, the name and the expression + to parse (note that the expression must fit all on one line, + using backslach-newline if needed). + + Typedefs may refer back to existing typedefs, so we pass + self.typedefs to _parse_expr(). + """ + seq = sequencer.Sequencer(name) + fields = _parse_expr(seq, expr, self.typedefs) + # Check for special string magic typedef. (The name + # probably should be just 's' but we won't check that + # here.) + if len(fields) == 1 and fields[0] == _STRING_MAGIC: + cls = None + else: + cls = pfod.pfod(name, fields) + if DEBUG: + print('typedef: {0} = {1!r}; '.format(name, fields), + end='', file=sys.stderr) + _debug_print_sequencer(seq) + if name in self.typedefs: + raise ValueError('redefining {0}'.format(name)) + self.typedefs[name] = cls, seq + + def handle_proto_def(self, name, proto_version, value, expr): + """ + Handle protocol definition. + + The regexp matched: + - The name of the protocol option such as Tversion, + Rversion, Rlerror, etc. + - The protocol version, if any (.u or .L). + - The value, if specified. If no value is specified + we use "the next value". + - The expression to parse. + + As with typedefs, the expression must fit all on one + line. + """ + if value: + value = int(value) + elif self.prev_proto_value is not None: + value = self.prev_proto_value + 1 + else: + raise ValueError('{0}: missing protocol value'.format(name)) + if value < 0 or value > 255: + raise ValueError('{0}: protocol value {1} out of ' + 'range'.format(name, value)) + self.prev_proto_value = value + + seq = sequencer.Sequencer(name) + fields = _parse_expr(seq, expr, self.typedefs) + cls = pfod.pfod(name, fields) + if DEBUG: + print('proto: {0} = {1}; '.format(name, value), + end='', file=sys.stderr) + _debug_print_sequencer(seq) + if name in self.protocol: + raise ValueError('redefining {0}'.format(name)) + self.protocol[name] = cls, value, proto_version, seq + + def setup_header(self): + """ + Handle header definition. + + This is a bit gimmicky and uses some special cases, + because data is sized to dsize which is effectively + just size - 5. We can't express this in our mini language, + so we just hard-code the sequencer and pfod. + + In addition, the unpacker never gets the original packet's + size field, only the fcall and the data. + """ + self.header_pfod = pfod.pfod('Header', 'size dsize fcall data') + + seq = sequencer.Sequencer('Header-pack') + # size: 4 bytes + seq.append_encdec(None, sequencer.EncDecSimple('size', 4, None)) + # fcall: 1 byte + seq.append_encdec(None, sequencer.EncDecSimple('fcall', 1, None)) + # data: string of length dsize + seq.append_encdec(None, sequencer.EncDecA('dsize', 'data', None)) + if DEBUG: + print('Header-pack:', file=sys.stderr) + _debug_print_sequencer(seq) + self.header_pack_seq = seq + + seq = sequencer.Sequencer('Header-unpack') + seq.append_encdec(None, sequencer.EncDecSimple('fcall', 1, None)) + seq.append_encdec(None, sequencer.EncDecA('dsize', 'data', None)) + if DEBUG: + print('Header-unpack:', file=sys.stderr) + _debug_print_sequencer(seq) + self.header_unpack_seq = seq + + def export(self, mod): + """ + Dump results of internal parsing process + into our module namespace. + + Note that we do not export the 's' typedef, which + did not define a data structure. + + Check for name collisions while we're at it. + """ + namespace = type('td', (object,), {}) + + # Export the typedefs (qid, stat). + setattr(mod, 'td', namespace) + for key in self.typedefs: + cls = self.typedefs[key][0] + if cls is None: + continue + setattr(namespace, key, cls) + + # Export two sequencers for en/decoding stat fields + # (needed for reading directories and doing Twstat). + setattr(namespace, 'stat_seq', self.typedefs['stat'][1]) + setattr(namespace, 'wirestat_seq', self.typedefs['wirestat'][1]) + + # Export the similar dirent decoder. + setattr(namespace, 'dirent_seq', self.typedefs['dirent'][1]) + + # Export the #define values + for key, val in self.defines.items(): + if hasattr(namespace, key): + print('{0!r} is both a #define and a typedef'.format(key)) + raise AssertionError('bad internal names') + setattr(namespace, key, val[0]) + + # Export Tattach, Rattach, Twrite, Rversion, etc values. + # Set up fcall_names[] table to map from value back to name. + # We also map fcall names to themselves, so given either a + # name or a byte code we can find out whether it's a valid + # fcall. + for key, val in self.protocol.items(): + if hasattr(namespace, key): + prev_def = '#define' if key in self.defines else 'typedef' + print('{0!r} is both a {1} and a protocol ' + 'value'.format(key, prev_def)) + raise AssertionError('bad internal names') + setattr(namespace, key, val[1]) + fcall_names[key] = key + fcall_names[val[1]] = key + + # Hook up PFOD's for each protocol object -- for + # Tversion/Rversion, Twrite/Rwrite, Tlopen/Rlopen, etc. + # They go in the rrd name-space, and also in dictionaries + # per-protocol here, with the lookup pointing to a _PackInfo + # for the corresponding sequencer. + # + # Note that each protocol PFOD is optionally annotated with + # its specific version. We know that .L > .u > plain; but + # all the "lesser" PFODs are available to all "greater" + # protocols at all times. + # + # (This is sort-of-wrong for Rerror vs Rlerror, but we + # don't bother to exclude Rerror from .L.) + # + # The PFODs themselves were already created, at parse time. + namespace = type('rrd', (object,), {}) + setattr(mod, 'rrd', namespace) + for key, val in self.protocol.items(): + cls = val[0] + proto_version = val[2] + seq = val[3] + packinfo = _PackInfo(seq) + if proto_version is None: + # all three protocols have it + self.plain[cls] = packinfo + self.dotu[cls] = packinfo + self.dotl[cls] = packinfo + elif proto_version == '.u': + # only .u and .L have it + self.dotu[cls] = packinfo + self.dotl[cls] = packinfo + elif proto_version == '.L': + # only .L has it + self.dotl[cls] = packinfo + else: + raise AssertionError('unknown protocol {1} for ' + '{0}'.format(key, proto_version)) + setattr(namespace, key, cls) + +_9p_data = _ProtoDefs() +_9p_data.export(sys.modules[__name__]) + +# Currently we look up by text-string, in lowercase. +_9p_versions = { + '9p2000': _P9Proto({'version': '9P2000'}, + {'.u': False}, + _9p_data, + _9p_data.plain, + 0), + '9p2000.u': _P9Proto({'version': '9P2000.u'}, + {'.u': True}, + _9p_data, + _9p_data.dotu, + 1), + '9p2000.l': _P9Proto({'version': '9P2000.L'}, + {'.u': True}, + _9p_data, + _9p_data.dotl, + 2), +} +def p9_version(vers_string): + """ + Return protocol implementation of given version. Raises + KeyError if the version is invalid. Note that the KeyError + will be on a string-ified, lower-cased version of the vers_string + argument, even if it comes in as a bytes instance in py3k. + """ + if not isinstance(vers_string, str) and isinstance(vers_string, bytes): + vers_string = vers_string.decode('utf-8', 'surrogateescape') + return _9p_versions[vers_string.lower()] + +plain = p9_version('9p2000') +dotu = p9_version('9p2000.u') +dotl = p9_version('9p2000.L') + +def qid_type2name(qidtype): + """ + Convert qid type field to printable string. + + >>> qid_type2name(td.QTDIR) + 'dir' + >>> qid_type2name(td.QTAPPEND) + 'append-only' + >>> qid_type2name(0xff) + 'invalid(0xff)' + """ + try: + # Is it ever OK to have multiple bits set, + # e.g., both QTAPPEND and QTEXCL? + return { + td.QTDIR: 'dir', + td.QTAPPEND: 'append-only', + td.QTEXCL: 'exclusive', + td.QTMOUNT: 'mount', + td.QTAUTH: 'auth', + td.QTTMP: 'tmp', + td.QTSYMLINK: 'symlink', + td.QTFILE: 'file', + }[qidtype] + except KeyError: + pass + return 'invalid({0:#x})'.format(qidtype) + +if __name__ == '__main__': + import doctest + doctest.testmod() Index: lib/lib9p/pytest/sequencer.py =================================================================== --- /dev/null +++ lib/lib9p/pytest/sequencer.py @@ -0,0 +1,653 @@ +#! /usr/bin/env python + +from __future__ import print_function + +#__all__ = ['EncDec', 'EncDecSimple', 'EncDecTyped', 'EncDecA', +# 'SequenceError', 'Sequencer'] + +import abc +import struct +import sys + +_ProtoStruct = { + '1': struct.Struct(') is the unicode-ized string type). + + EncDec also provides b2u() and u2b() to do conversion to/from + Unicode. + + These are partly for internal use (all strings get converted + to UTF-8 byte sequences when coding a _string_ type) and partly + for doctests, where we just want some py2k/py3k compat hacks. + """ + def __init__(self, name, aux): + self.name = name + self.aux = aux + + @staticmethod + def b2u(byte_sequence): + "transform bytes to unicode" + return byte_sequence.decode('utf-8', 'surrogateescape') + + @staticmethod + def u2b(unicode_sequence): + "transform unicode to bytes" + return unicode_sequence.encode('utf-8', 'surrogateescape') + + if sys.version_info[0] >= 3: + b2s = b2u + @staticmethod + def s2b(string): + "transform string to bytes (leaves raw byte sequence unchanged)" + if isinstance(string, bytes): + return string + return string.encode('utf-8', 'surrogateescape') + else: + @staticmethod + def b2s(byte_sequence): + "transform bytes to string - no-op in python2.7" + return byte_sequence + @staticmethod + def s2b(string): + "transform string or unicode to bytes" + if isinstance(string, unicode): + return string.encode('utf-8', 'surrogateescape') + return string + + def pack(self, vdict, cdict, val): + "encode value into a byte-string" + return b''.join(self.apack(vdict, cdict, val)) + + @abc.abstractmethod + def apack(self, vdict, cdict, val): + "encode value into [bytes1, b2, ..., bN]" + + @abc.abstractmethod + def unpack(self, vdict, cdict, bstring, offset, noerror=False): + "unpack bytes from at " + + +class EncDecSimple(EncDec): + r""" + Encode/decode a simple (but named) field. The field is not an + array, which requires using EncDecA, nor a typed object + like a qid or stat instance -- those require a Sequence and + EncDecTyped. + + The format is one of '1'/1, '2'/2, '4'/4, '8'/8, or '_string_'. + + Note: using b2s here is purely a doctest/tetsmod python2/python3 + compat hack. The output of e.pack is ; b2s + converts it to a string, purely for display purposes. (It might + be better to map py2 output to bytes but they just print as a + string anyway.) In normal use, you should not call b2s here. + + >>> e = EncDecSimple('eggs', 2) + >>> e.b2s(e.pack({}, {}, 0)) + '\x00\x00' + >>> e.b2s(e.pack({}, {}, 256)) + '\x00\x01' + + Values that cannot be packed produce a SequenceError: + + >>> e.pack({}, {}, None) + Traceback (most recent call last): + ... + SequenceError: failed while packing 'eggs'=None + >>> e.pack({}, {}, -1) + Traceback (most recent call last): + ... + SequenceError: failed while packing 'eggs'=-1 + + Unpacking both returns a value, and tells how many bytes it + used out of the bytestring or byte-array argument. If there + are not enough bytes remaining at the starting offset, it + raises a SequenceError, unless noerror=True (then unset + values are None) + + >>> e.unpack({}, {}, b'\x00\x01', 0) + (256, 2) + >>> e.unpack({}, {}, b'', 0) + Traceback (most recent call last): + ... + SequenceError: out of data while unpacking 'eggs' + >>> e.unpack({}, {}, b'', 0, noerror=True) + (None, 2) + + Note that strings can be provided as regular strings, byte + strings (same as regular strings in py2k), or Unicode strings + (same as regular strings in py3k). Unicode strings will be + converted to UTF-8 before being packed. Since this leaves + 7-bit characters alone, these examples work in both py2k and + py3k. (Note: the UTF-8 encoding of u'\u1234' is + '\0xe1\0x88\0xb4' or 225, 136, 180. The b2i trick below is + another py2k vs py3k special case just for doctests: py2k + tries to display the utf-8 encoded data as a string.) + + >>> e = EncDecSimple('spam', '_string_') + >>> e.b2s(e.pack({}, {}, 'p3=unicode,p2=bytes')) + '\x13\x00p3=unicode,p2=bytes' + + >>> e.b2s(e.pack({}, {}, b'bytes')) + '\x05\x00bytes' + + >>> import sys + >>> ispy3k = sys.version_info[0] >= 3 + + >>> b2i = lambda x: x if ispy3k else ord(x) + >>> [b2i(x) for x in e.pack({}, {}, u'\u1234')] + [3, 0, 225, 136, 180] + + The byte length of the utf-8 data cannot exceed 65535 since + the encoding has the length as a 2-byte field (a la the + encoding for 'eggs' here). A too-long string produces + a SequenceError as well. + + >>> e.pack({}, {}, 16384 * 'spam') + Traceback (most recent call last): + ... + SequenceError: string too long (len=65536) while packing 'spam' + + Unpacking strings produces byte arrays. (Of course, + in py2k these are also known as .) + + >>> unpacked = e.unpack({}, {}, b'\x04\x00data', 0) + >>> etype = bytes if ispy3k else str + >>> print(isinstance(unpacked[0], etype)) + True + >>> e.b2s(unpacked[0]) + 'data' + >>> unpacked[1] + 6 + + You may use e.b2s() to conver them to unicode strings in py3k, + or you may set e.autob2s. This still only really does + anything in py3k, since py2k strings *are* bytes, so it's + really just intended for doctest purposes (see EncDecA): + + >>> e.autob2s = True + >>> e.unpack({}, {}, b'\x07\x00stringy', 0) + ('stringy', 9) + """ + def __init__(self, name, fmt, aux=None): + super(EncDecSimple, self).__init__(name, aux) + self.fmt = fmt + self.struct = _ProtoStruct[fmt] + self.autob2s = False + + def __repr__(self): + if self.aux is None: + return '{0}({1!r}, {2!r})'.format(self.__class__.__name__, + self.name, self.fmt) + return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__, + self.name, self.fmt, self.aux) + + __str__ = __repr__ + + def apack(self, vdict, cdict, val): + "encode a value" + try: + if self.struct: + return [self.struct.pack(val)] + sval = self.s2b(val) + if len(sval) > 65535: + raise SequenceError('string too long (len={0:d}) ' + 'while packing {1!r}'.format(len(sval), self.name)) + return [EncDecSimple.string_len.pack(len(sval)), sval] + # Include AttributeError in case someone tries to, e.g., + # pack name=None and self.s2b() tries to use .encode on it. + except (struct.error, AttributeError): + raise SequenceError('failed ' + 'while packing {0!r}={1!r}'.format(self.name, val)) + + def _unpack1(self, via, bstring, offset, noerror): + "internal function to unpack single item" + try: + tup = via.unpack_from(bstring, offset) + except struct.error as err: + if 'unpack_from requires a buffer of at least' in str(err): + if noerror: + return None, offset + via.size + raise SequenceError('out of data ' + 'while unpacking {0!r}'.format(self.name)) + # not clear what to do here if noerror + raise SequenceError('failed ' + 'while unpacking {0!r}'.format(self.name)) + assert len(tup) == 1 + return tup[0], offset + via.size + + def unpack(self, vdict, cdict, bstring, offset, noerror=False): + "decode a value; return the value and the new offset" + if self.struct: + return self._unpack1(self.struct, bstring, offset, noerror) + slen, offset = self._unpack1(EncDecSimple.string_len, bstring, offset, + noerror) + if slen is None: + return None, offset + nexto = offset + slen + if len(bstring) < nexto: + if noerror: + val = None + else: + raise SequenceError('out of data ' + 'while unpacking {0!r}'.format(self.name)) + else: + val = bstring[offset:nexto] + if self.autob2s: + val = self.b2s(val) + return val, nexto + +# string length: 2 byte unsigned field +EncDecSimple.string_len = _ProtoStruct[2] + +class EncDecTyped(EncDec): + r""" + EncDec for typed objects (which are build from PFODs, which are + a sneaky class variant of OrderedDict similar to namedtuple). + + Calling the klass() function with no arguments must create an + instance with all-None members. + + We also require a Sequencer to pack and unpack the members of + the underlying pfod. + + >>> qid_s = Sequencer('qid') + >>> qid_s.append_encdec(None, EncDecSimple('type', 1)) + >>> qid_s.append_encdec(None, EncDecSimple('version', 4)) + >>> qid_s.append_encdec(None, EncDecSimple('path', 8)) + >>> len(qid_s) + 3 + + >>> from pfod import pfod + >>> qid = pfod('qid', ['type', 'version', 'path']) + >>> len(qid._fields) + 3 + >>> qid_inst = qid(1, 2, 3) + >>> qid_inst + qid(type=1, version=2, path=3) + + >>> e = EncDecTyped(qid, 'aqid', qid_s) + >>> e.b2s(e.pack({}, {}, qid_inst)) + '\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00' + >>> e.unpack({}, {}, + ... b'\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00', 0) + (qid(type=1, version=2, path=3), 13) + + If an EncDecTyped instance has a conditional sequencer, note + that unpacking will leave un-selected items set to None (see + the Sequencer example below): + + >>> breakfast = pfod('breakfast', 'eggs spam ham') + >>> breakfast() + breakfast(eggs=None, spam=None, ham=None) + >>> bfseq = Sequencer('breakfast') + >>> bfseq.append_encdec(None, EncDecSimple('eggs', 1)) + >>> bfseq.append_encdec('yuck', EncDecSimple('spam', 1)) + >>> bfseq.append_encdec(None, EncDecSimple('ham', 1)) + >>> e = EncDecTyped(breakfast, 'bfname', bfseq) + >>> e.unpack({}, {'yuck': False}, b'\x02\x01\x04', 0) + (breakfast(eggs=2, spam=None, ham=1), 2) + + This used just two of the three bytes: eggs=2, ham=1. + + >>> e.unpack({}, {'yuck': True}, b'\x02\x01\x04', 0) + (breakfast(eggs=2, spam=1, ham=4), 3) + + This used the third byte, so ham=4. + """ + def __init__(self, klass, name, sequence, aux=None): + assert len(sequence) == len(klass()._fields) # temporary + super(EncDecTyped, self).__init__(name, aux) + self.klass = klass + self.name = name + self.sequence = sequence + + def __repr__(self): + if self.aux is None: + return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__, + self.klass, self.name, self.sequence) + return '{0}({1!r}, {2!r}, {3!r}, {4!r})'.format(self.__class__.__name__, + self.klass, self.name, self.sequence, self.aux) + + __str__ = __repr__ + + def apack(self, vdict, cdict, val): + """ + Pack each of our instance variables. + + Note that some packing may be conditional. + """ + return self.sequence.apack(val, cdict) + + def unpack(self, vdict, cdict, bstring, offset, noerror=False): + """ + Unpack each instance variable, into a new object of + self.klass. Return the new instance and new offset. + + Note that some unpacking may be conditional. + """ + obj = self.klass() + offset = self.sequence.unpack_from(obj, cdict, bstring, offset, noerror) + return obj, offset + +class EncDecA(EncDec): + r""" + EncDec for arrays (repeated objects). + + We take the name of repeat count variable, and a sub-coder + (Sequencer instance). For instance, we can en/de-code + repeat='nwname' copies of name='wname', or nwname of + name='wqid', in a Twalk en/de-code. + + Note that we don't pack or unpack the repeat count itself -- + that must be done by higher level code. We just get its value + from vdict. + + >>> subcode = EncDecSimple('wname', '_string_') + >>> e = EncDecA('nwname', 'wname', subcode) + >>> e.b2s(e.pack({'nwname': 2}, {}, ['A', 'BC'])) + '\x01\x00A\x02\x00BC' + + >>> subcode.autob2s = True # so that A and BC decode to py3k str + >>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02\x00BC', 0) + (['A', 'BC'], 7) + + When using noerror, the first sub-item that fails to decode + completely starts the None-s. Strings whose length fails to + decode are assumed to be zero bytes long as well, for the + purpose of showing the expected packet length: + + >>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02\x00', 0, noerror=True) + (['A', None], 7) + >>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02', 0, noerror=True) + (['A', None], 5) + >>> e.unpack({'nwname': 3}, {}, b'\x01\x00A\x02', 0, noerror=True) + (['A', None, None], 7) + + As a special case, supplying None for the sub-coder + makes the repeated item pack or unpack a simple byte + string. (Note that autob2s is not supported here.) + A too-short byte string is simply truncated! + + >>> e = EncDecA('count', 'data', None) + >>> e.b2s(e.pack({'count': 5}, {}, b'12345')) + '12345' + >>> x = list(e.unpack({'count': 3}, {}, b'123', 0)) + >>> x[0] = e.b2s(x[0]) + >>> x + ['123', 3] + >>> x = list(e.unpack({'count': 3}, {}, b'12', 0, noerror=True)) + >>> x[0] = e.b2s(x[0]) + >>> x + ['12', 3] + """ + def __init__(self, repeat, name, sub, aux=None): + super(EncDecA, self).__init__(name, aux) + self.repeat = repeat + self.name = name + self.sub = sub + + def __repr__(self): + if self.aux is None: + return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__, + self.repeat, self.name, self.sub) + return '{0}({1!r}, {2!r}, {3!r}, {4!r})'.format(self.__class__.__name__, + self.repeat, self.name, self.sub, self.aux) + + __str__ = __repr__ + + def apack(self, vdict, cdict, val): + "pack each val[i], for i in range(vdict[self.repeat])" + num = vdict[self.repeat] + assert num == len(val) + if self.sub is None: + assert isinstance(val, bytes) + return [val] + parts = [] + for i in val: + parts.extend(self.sub.apack(vdict, cdict, i)) + return parts + + def unpack(self, vdict, cdict, bstring, offset, noerror=False): + "unpack repeatedly, per self.repeat, into new array." + num = vdict[self.repeat] + if num is None and noerror: + num = 0 + else: + assert num >= 0 + if self.sub is None: + nexto = offset + num + if len(bstring) < nexto and not noerror: + raise SequenceError('out of data ' + 'while unpacking {0!r}'.format(self.name)) + return bstring[offset:nexto], nexto + array = [] + for i in range(num): + obj, offset = self.sub.unpack(vdict, cdict, bstring, offset, + noerror) + array.append(obj) + return array, offset + +class SequenceError(Exception): + "sequence error: item too big, or ran out of data" + pass + +class Sequencer(object): + r""" + A sequencer is an object that packs (marshals) or unpacks + (unmarshals) a series of objects, according to their EncDec + instances. + + The objects themselves (and their values) come from, or + go into, a dictionary: , the first argument to + pack/unpack. + + Some fields may be conditional. The conditions are in a + separate dictionary (the second or argument). + + Some objects may be dictionaries or PFODs, e.g., they may + be a Plan9 qid or stat structure. These have their own + sub-encoding. + + As with each encoder, we have both an apack() function + (returns a list of parts) and a plain pack(). Users should + mostly stick with plain pack(). + + >>> s = Sequencer('monty') + >>> s + Sequencer('monty') + >>> e = EncDecSimple('eggs', 2) + >>> s.append_encdec(None, e) + >>> s.append_encdec(None, EncDecSimple('spam', 1)) + >>> s[0] + (None, EncDecSimple('eggs', 2)) + >>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {})) + '\x01\x02A' + + When particular fields are conditional, they appear in + packed output, or are taken from the byte-string during + unpacking, only if their condition is true. + + As with struct, use unpack_from to start at an arbitrary + offset and/or omit verification that the entire byte-string + is consumed. + + >>> s = Sequencer('python') + >>> s.append_encdec(None, e) + >>> s.append_encdec('.u', EncDecSimple('spam', 1)) + >>> s[1] + ('.u', EncDecSimple('spam', 1)) + >>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {'.u': True})) + '\x01\x02A' + >>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {'.u': False})) + '\x01\x02' + + >>> d = {} + >>> s.unpack(d, {'.u': True}, b'\x01\x02A') + >>> print(d['eggs'], d['spam']) + 513 65 + >>> d = {} + >>> s.unpack(d, {'.u': False}, b'\x01\x02A', 0) + Traceback (most recent call last): + ... + SequenceError: 1 byte(s) unconsumed + >>> s.unpack_from(d, {'.u': False}, b'\x01\x02A', 0) + 2 + >>> print(d) + {'eggs': 513} + + The incoming dictionary-like object may be pre-initialized + if you like; only sequences that decode are filled-in: + + >>> d = {'eggs': None, 'spam': None} + >>> s.unpack_from(d, {'.u': False}, b'\x01\x02A', 0) + 2 + >>> print(d['eggs'], d['spam']) + 513 None + + Some objects may be arrays; if so their EncDec is actually + an EncDecA, the repeat count must be in the dictionary, and + the object itself must have a len() and be index-able: + + >>> s = Sequencer('arr') + >>> s.append_encdec(None, EncDecSimple('n', 1)) + >>> ae = EncDecSimple('array', 2) + >>> s.append_encdec(None, EncDecA('n', 'array', ae)) + >>> ae.b2s(s.pack({'n': 2, 'array': [257, 514]}, {})) + '\x02\x01\x01\x02\x02' + + Unpacking an array creates a list of the number of items. + The EncDec encoder that decodes the number of items needs to + occur first in the sequencer, so that the dictionary will have + acquired the repeat-count variable's value by the time we hit + the array's encdec: + + >>> d = {} + >>> s.unpack(d, {}, b'\x01\x04\x00') + >>> d['n'], d['array'] + (1, [4]) + """ + def __init__(self, name): + self.name = name + self._codes = [] + self.debug = False # or sys.stderr + + def __repr__(self): + return '{0}({1!r})'.format(self.__class__.__name__, self.name) + + __str__ = __repr__ + + def __len__(self): + return len(self._codes) + + def __iter__(self): + return iter(self._codes) + + def __getitem__(self, index): + return self._codes[index] + + def dprint(self, *args, **kwargs): + if not self.debug: + return + if isinstance(self.debug, bool): + dest = sys.stdout + else: + dest = self.debug + print(*args, file=dest, **kwargs) + + def append_encdec(self, cond, code): + "add EncDec en/de-coder, conditional on cond" + self._codes.append((cond, code)) + + def apack(self, vdict, cdict): + """ + Produce packed representation of each field. + """ + packed_data = [] + for cond, code in self._codes: + # Skip this item if it's conditional on a false thing. + if cond is not None and not cdict[cond]: + self.dprint('skip %r - %r is False' % (code, cond)) + continue + + # Pack the item. + self.dprint('pack %r - no cond or %r is True' % (code, cond)) + packed_data.extend(code.apack(vdict, cdict, vdict[code.name])) + + return packed_data + + def pack(self, vdict, cdict): + """ + Flatten packed data. + """ + return b''.join(self.apack(vdict, cdict)) + + def unpack_from(self, vdict, cdict, bstring, offset=0, noerror=False): + """ + Unpack from byte string. + + The values are unpacked into a dictionary vdict; + some of its entries may themselves be ordered + dictionaries created by typedefed codes. + + Raises SequenceError if the string is too short, + unless you set noerror, in which case we assume + you want see what you can get out of the data. + """ + for cond, code in self._codes: + # Skip this item if it's conditional on a false thing. + if cond is not None and not cdict[cond]: + self.dprint('skip %r - %r is False' % (code, cond)) + continue + + # Unpack the item. + self.dprint('unpack %r - no cond or %r is True' % (code, cond)) + obj, offset = code.unpack(vdict, cdict, bstring, offset, noerror) + vdict[code.name] = obj + + return offset + + def unpack(self, vdict, cdict, bstring, noerror=False): + """ + Like unpack_from but unless noerror=True, requires that + we completely use up the given byte string. + """ + offset = self.unpack_from(vdict, cdict, bstring, 0, noerror) + if not noerror and offset != len(bstring): + raise SequenceError('{0} byte(s) unconsumed'.format( + len(bstring) - offset)) + +if __name__ == '__main__': + import doctest + doctest.testmod() Index: lib/lib9p/pytest/testconf.ini.sample =================================================================== --- /dev/null +++ lib/lib9p/pytest/testconf.ini.sample @@ -0,0 +1,16 @@ +# test configuration + +[client] +server = localhost +port = 12345 +# timeout is in seconds +timeout = 0.1 +loglevel = INFO +logfile = ./ctest.log +# logfmt = ... +# protocol = 9p2000, 9p2000.u, or 9p2000.L +protocol = 9p2000.L +only_dotl = true +may_downgrade = False +uname = anonymous +n_uname = 1001 Index: lib/lib9p/request.c =================================================================== --- /dev/null +++ lib/lib9p/request.c @@ -0,0 +1,1438 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#if defined(__FreeBSD__) +#include +#else +#include "sbuf/sbuf.h" +#endif +#include "lib9p.h" +#include "lib9p_impl.h" +#include "fcall.h" +#include "fid.h" +#include "hashtable.h" +#include "log.h" +#include "linux_errno.h" +#include "backend/backend.h" +#include "threadpool.h" + +#define N(x) (sizeof(x) / sizeof(x[0])) + +static int l9p_dispatch_tversion(struct l9p_request *req); +static int l9p_dispatch_tattach(struct l9p_request *req); +static int l9p_dispatch_tclunk(struct l9p_request *req); +static int l9p_dispatch_tcreate(struct l9p_request *req); +static int l9p_dispatch_topen(struct l9p_request *req); +static int l9p_dispatch_tread(struct l9p_request *req); +static int l9p_dispatch_tremove(struct l9p_request *req); +static int l9p_dispatch_tstat(struct l9p_request *req); +static int l9p_dispatch_twalk(struct l9p_request *req); +static int l9p_dispatch_twrite(struct l9p_request *req); +static int l9p_dispatch_twstat(struct l9p_request *req); +static int l9p_dispatch_tstatfs(struct l9p_request *req); +static int l9p_dispatch_tlopen(struct l9p_request *req); +static int l9p_dispatch_tlcreate(struct l9p_request *req); +static int l9p_dispatch_tsymlink(struct l9p_request *req); +static int l9p_dispatch_tmknod(struct l9p_request *req); +static int l9p_dispatch_trename(struct l9p_request *req); +static int l9p_dispatch_treadlink(struct l9p_request *req); +static int l9p_dispatch_tgetattr(struct l9p_request *req); +static int l9p_dispatch_tsetattr(struct l9p_request *req); +static int l9p_dispatch_txattrwalk(struct l9p_request *req); +static int l9p_dispatch_txattrcreate(struct l9p_request *req); +static int l9p_dispatch_treaddir(struct l9p_request *req); +static int l9p_dispatch_tfsync(struct l9p_request *req); +static int l9p_dispatch_tlock(struct l9p_request *req); +static int l9p_dispatch_tgetlock(struct l9p_request *req); +static int l9p_dispatch_tlink(struct l9p_request *req); +static int l9p_dispatch_tmkdir(struct l9p_request *req); +static int l9p_dispatch_trenameat(struct l9p_request *req); +static int l9p_dispatch_tunlinkat(struct l9p_request *req); + +/* + * Each Txxx handler has a "must run" flag. If it is false, + * we check for a flush request before calling the handler. + * If a flush is already requested we can instantly fail the + * request with EINTR. + * + * Tclunk and Tremove must run because they make their fids + * become invalid. Tversion and Tattach should never get + * a flush request applied (it makes no sense as the connection + * is not really running yet), so it should be harmless to + * set them either way, but for now we have them as must-run. + * Flushing a Tflush is not really allowed either so we keep + * these as must-run too (although they run without being done + * threaded anyway). + */ +struct l9p_handler { + enum l9p_ftype type; + int (*handler)(struct l9p_request *); + bool must_run; +}; + +static const struct l9p_handler l9p_handlers_no_version[] = { + {L9P_TVERSION, l9p_dispatch_tversion, true}, +}; + +static const struct l9p_handler l9p_handlers_base[] = { + {L9P_TVERSION, l9p_dispatch_tversion, true}, + {L9P_TATTACH, l9p_dispatch_tattach, true}, + {L9P_TCLUNK, l9p_dispatch_tclunk, true}, + {L9P_TFLUSH, l9p_threadpool_tflush, true}, + {L9P_TCREATE, l9p_dispatch_tcreate, false}, + {L9P_TOPEN, l9p_dispatch_topen, false}, + {L9P_TREAD, l9p_dispatch_tread, false}, + {L9P_TWRITE, l9p_dispatch_twrite, false}, + {L9P_TREMOVE, l9p_dispatch_tremove, true}, + {L9P_TSTAT, l9p_dispatch_tstat, false}, + {L9P_TWALK, l9p_dispatch_twalk, false}, + {L9P_TWSTAT, l9p_dispatch_twstat, false} +}; +static const struct l9p_handler l9p_handlers_dotu[] = { + {L9P_TVERSION, l9p_dispatch_tversion, true}, + {L9P_TATTACH, l9p_dispatch_tattach, true}, + {L9P_TCLUNK, l9p_dispatch_tclunk, true}, + {L9P_TFLUSH, l9p_threadpool_tflush, true}, + {L9P_TCREATE, l9p_dispatch_tcreate, false}, + {L9P_TOPEN, l9p_dispatch_topen, false}, + {L9P_TREAD, l9p_dispatch_tread, false}, + {L9P_TWRITE, l9p_dispatch_twrite, false}, + {L9P_TREMOVE, l9p_dispatch_tremove, true}, + {L9P_TSTAT, l9p_dispatch_tstat, false}, + {L9P_TWALK, l9p_dispatch_twalk, false}, + {L9P_TWSTAT, l9p_dispatch_twstat, false} +}; +static const struct l9p_handler l9p_handlers_dotL[] = { + {L9P_TVERSION, l9p_dispatch_tversion, true}, + {L9P_TATTACH, l9p_dispatch_tattach, true}, + {L9P_TCLUNK, l9p_dispatch_tclunk, true}, + {L9P_TFLUSH, l9p_threadpool_tflush, true}, + {L9P_TCREATE, l9p_dispatch_tcreate, false}, + {L9P_TOPEN, l9p_dispatch_topen, false}, + {L9P_TREAD, l9p_dispatch_tread, false}, + {L9P_TWRITE, l9p_dispatch_twrite, false}, + {L9P_TREMOVE, l9p_dispatch_tremove, true}, + {L9P_TSTAT, l9p_dispatch_tstat, false}, + {L9P_TWALK, l9p_dispatch_twalk, false}, + {L9P_TWSTAT, l9p_dispatch_twstat, false}, + {L9P_TSTATFS, l9p_dispatch_tstatfs, false}, + {L9P_TLOPEN, l9p_dispatch_tlopen, false}, + {L9P_TLCREATE, l9p_dispatch_tlcreate, false}, + {L9P_TSYMLINK, l9p_dispatch_tsymlink, false}, + {L9P_TMKNOD, l9p_dispatch_tmknod, false}, + {L9P_TRENAME, l9p_dispatch_trename, false}, + {L9P_TREADLINK, l9p_dispatch_treadlink, false}, + {L9P_TGETATTR, l9p_dispatch_tgetattr, false}, + {L9P_TSETATTR, l9p_dispatch_tsetattr, false}, + {L9P_TXATTRWALK, l9p_dispatch_txattrwalk, false}, + {L9P_TXATTRCREATE, l9p_dispatch_txattrcreate, false}, + {L9P_TREADDIR, l9p_dispatch_treaddir, false}, + {L9P_TFSYNC, l9p_dispatch_tfsync, false}, + {L9P_TLOCK, l9p_dispatch_tlock, true}, + {L9P_TGETLOCK, l9p_dispatch_tgetlock, true}, + {L9P_TLINK, l9p_dispatch_tlink, false}, + {L9P_TMKDIR, l9p_dispatch_tmkdir, false}, + {L9P_TRENAMEAT, l9p_dispatch_trenameat, false}, + {L9P_TUNLINKAT, l9p_dispatch_tunlinkat, false}, +}; + +/* + * NB: version index 0 is reserved for new connections, and + * is a protocol that handles only L9P_TVERSION. Once we get a + * valid version, we start a new session using its dispatch table. + */ +static const struct { + const char *name; + const struct l9p_handler *handlers; + int n_handlers; +} l9p_versions[] = { + { "", l9p_handlers_no_version, N(l9p_handlers_no_version) }, + { "9P2000", l9p_handlers_base, N(l9p_handlers_base) }, + { "9P2000.u", l9p_handlers_dotu, N(l9p_handlers_dotu), }, + { "9P2000.L", l9p_handlers_dotL, N(l9p_handlers_dotL), }, +}; + +/* + * Run the appropriate handler for this request. + * It's our caller's responsibility to respond. + */ +int +l9p_dispatch_request(struct l9p_request *req) +{ + struct l9p_connection *conn; +#if defined(L9P_DEBUG) + struct sbuf *sb; +#endif + size_t i, n; + const struct l9p_handler *handlers, *hp; + bool flush_requested; + + conn = req->lr_conn; + flush_requested = req->lr_flushstate == L9P_FLUSH_REQUESTED_PRE_START; + + handlers = l9p_versions[conn->lc_version].handlers; + n = (size_t)l9p_versions[conn->lc_version].n_handlers; + for (hp = handlers, i = 0; i < n; hp++, i++) + if (req->lr_req.hdr.type == hp->type) + goto found; + hp = NULL; +found: + +#if defined(L9P_DEBUG) + sb = sbuf_new_auto(); + if (flush_requested) { + sbuf_cat(sb, "FLUSH requested pre-dispatch"); + if (hp != NULL && hp->must_run) + sbuf_cat(sb, ", but must run"); + sbuf_cat(sb, ": "); + } + l9p_describe_fcall(&req->lr_req, conn->lc_version, sb); + sbuf_finish(sb); + + L9P_LOG(L9P_DEBUG, "%s", sbuf_data(sb)); + sbuf_delete(sb); +#endif + + if (hp != NULL) { + if (!flush_requested || hp->must_run) + return (hp->handler(req)); + return (EINTR); + } + + L9P_LOG(L9P_WARNING, "unknown request of type %d", + req->lr_req.hdr.type); + return (ENOSYS); +} + +/* + * Translate BSD errno to 9P2000/9P2000.u errno. + */ +static inline int +e29p(int errnum) +{ + static int const table[] = { + [ENOTEMPTY] = EPERM, + [EDQUOT] = EPERM, + [ENOSYS] = EPERM, /* ??? */ + }; + + if ((size_t)errnum < N(table) && table[errnum] != 0) + return (table[errnum]); + if (errnum <= ERANGE) + return (errnum); + return (EIO); /* ??? */ +} + +/* + * Translate BSD errno to Linux errno. + */ +static inline int +e2linux(int errnum) +{ + static int const table[] = { + [EDEADLK] = LINUX_EDEADLK, + [EAGAIN] = LINUX_EAGAIN, + [EINPROGRESS] = LINUX_EINPROGRESS, + [EALREADY] = LINUX_EALREADY, + [ENOTSOCK] = LINUX_ENOTSOCK, + [EDESTADDRREQ] = LINUX_EDESTADDRREQ, + [EMSGSIZE] = LINUX_EMSGSIZE, + [EPROTOTYPE] = LINUX_EPROTOTYPE, + [ENOPROTOOPT] = LINUX_ENOPROTOOPT, + [EPROTONOSUPPORT] = LINUX_EPROTONOSUPPORT, + [ESOCKTNOSUPPORT] = LINUX_ESOCKTNOSUPPORT, + [EOPNOTSUPP] = LINUX_EOPNOTSUPP, + [EPFNOSUPPORT] = LINUX_EPFNOSUPPORT, + [EAFNOSUPPORT] = LINUX_EAFNOSUPPORT, + [EADDRINUSE] = LINUX_EADDRINUSE, + [EADDRNOTAVAIL] = LINUX_EADDRNOTAVAIL, + [ENETDOWN] = LINUX_ENETDOWN, + [ENETUNREACH] = LINUX_ENETUNREACH, + [ENETRESET] = LINUX_ENETRESET, + [ECONNABORTED] = LINUX_ECONNABORTED, + [ECONNRESET] = LINUX_ECONNRESET, + [ENOBUFS] = LINUX_ENOBUFS, + [EISCONN] = LINUX_EISCONN, + [ENOTCONN] = LINUX_ENOTCONN, + [ESHUTDOWN] = LINUX_ESHUTDOWN, + [ETOOMANYREFS] = LINUX_ETOOMANYREFS, + [ETIMEDOUT] = LINUX_ETIMEDOUT, + [ECONNREFUSED] = LINUX_ECONNREFUSED, + [ELOOP] = LINUX_ELOOP, + [ENAMETOOLONG] = LINUX_ENAMETOOLONG, + [EHOSTDOWN] = LINUX_EHOSTDOWN, + [EHOSTUNREACH] = LINUX_EHOSTUNREACH, + [ENOTEMPTY] = LINUX_ENOTEMPTY, + [EPROCLIM] = LINUX_EAGAIN, + [EUSERS] = LINUX_EUSERS, + [EDQUOT] = LINUX_EDQUOT, + [ESTALE] = LINUX_ESTALE, + [EREMOTE] = LINUX_EREMOTE, + /* EBADRPC = unmappable? */ + /* ERPCMISMATCH = unmappable? */ + /* EPROGUNAVAIL = unmappable? */ + /* EPROGMISMATCH = unmappable? */ + /* EPROCUNAVAIL = unmappable? */ + [ENOLCK] = LINUX_ENOLCK, + [ENOSYS] = LINUX_ENOSYS, + /* EFTYPE = unmappable? */ + /* EAUTH = unmappable? */ + /* ENEEDAUTH = unmappable? */ + [EIDRM] = LINUX_EIDRM, + [ENOMSG] = LINUX_ENOMSG, + [EOVERFLOW] = LINUX_EOVERFLOW, + [ECANCELED] = LINUX_ECANCELED, + [EILSEQ] = LINUX_EILSEQ, + /* EDOOFUS = unmappable? */ + [EBADMSG] = LINUX_EBADMSG, + [EMULTIHOP] = LINUX_EMULTIHOP, + [ENOLINK] = LINUX_ENOLINK, + [EPROTO] = LINUX_EPROTO, + /* ENOTCAPABLE = unmappable? */ + [ECAPMODE] = EPERM, +#ifdef ENOTRECOVERABLE + [ENOTRECOVERABLE] = LINUX_ENOTRECOVERABLE, +#endif +#ifdef EOWNERDEAD + [EOWNERDEAD] = LINUX_EOWNERDEAD, +#endif + }; + + /* + * In case we want to return a raw Linux errno, allow negative + * values a la Linux kernel internals. + * + * Values up to ERANGE are shared across systems (see + * linux_errno.h), except for EAGAIN. + */ + if (errnum < 0) + return (-errnum); + + if ((size_t)errnum < N(table) && table[errnum] != 0) + return (table[errnum]); + + if (errnum <= ERANGE) + return (errnum); + + L9P_LOG(L9P_WARNING, "cannot map errno %d to anything reasonable", + errnum); + + return (LINUX_ENOTRECOVERABLE); /* ??? */ +} + +/* + * Send response to request, or possibly just drop request. + * We also need to know whether to remove the request from + * the tag hash table. + */ +void +l9p_respond(struct l9p_request *req, bool drop, bool rmtag) +{ + struct l9p_connection *conn = req->lr_conn; + size_t iosize; +#if defined(L9P_DEBUG) + struct sbuf *sb; + const char *ftype; +#endif + int error; + + req->lr_resp.hdr.tag = req->lr_req.hdr.tag; + + error = req->lr_error; + if (error == 0) + req->lr_resp.hdr.type = req->lr_req.hdr.type + 1; + else { + if (conn->lc_version == L9P_2000L) { + req->lr_resp.hdr.type = L9P_RLERROR; + req->lr_resp.error.errnum = (uint32_t)e2linux(error); + } else { + req->lr_resp.hdr.type = L9P_RERROR; + req->lr_resp.error.ename = strerror(error); + req->lr_resp.error.errnum = (uint32_t)e29p(error); + } + } + +#if defined(L9P_DEBUG) + sb = sbuf_new_auto(); + l9p_describe_fcall(&req->lr_resp, conn->lc_version, sb); + sbuf_finish(sb); + + switch (req->lr_flushstate) { + case L9P_FLUSH_NONE: + ftype = ""; + break; + case L9P_FLUSH_REQUESTED_PRE_START: + ftype = "FLUSH requested pre-dispatch: "; + break; + case L9P_FLUSH_REQUESTED_POST_START: + ftype = "FLUSH requested while running: "; + break; + case L9P_FLUSH_TOOLATE: + ftype = "FLUSH requested too late: "; + break; + } + L9P_LOG(L9P_DEBUG, "%s%s%s", + drop ? "DROP: " : "", ftype, sbuf_data(sb)); + sbuf_delete(sb); +#endif + + error = drop ? 0 : + l9p_pufcall(&req->lr_resp_msg, &req->lr_resp, conn->lc_version); + if (rmtag) + ht_remove(&conn->lc_requests, req->lr_req.hdr.tag); + if (error != 0) { + L9P_LOG(L9P_ERROR, "cannot pack response"); + drop = true; + } + + if (drop) { + conn->lc_lt.lt_drop_response(req, + req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov, + conn->lc_lt.lt_aux); + } else { + iosize = req->lr_resp_msg.lm_size; + + /* + * Include I/O size in calculation for Rread and + * Rreaddir responses. + */ + if (req->lr_resp.hdr.type == L9P_RREAD || + req->lr_resp.hdr.type == L9P_RREADDIR) + iosize += req->lr_resp.io.count; + + conn->lc_lt.lt_send_response(req, + req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov, + iosize, conn->lc_lt.lt_aux); + } + + l9p_freefcall(&req->lr_req); + l9p_freefcall(&req->lr_resp); + + free(req); +} + +/* + * This allows a caller to iterate through the data in a + * read or write request (creating the data if packing, + * scanning through it if unpacking). This is used for + * writing readdir entries, so mode should be L9P_PACK + * (but we allow L9P_UNPACK so that debug code can also scan + * through the data later, if desired). + * + * This relies on the Tread op having positioned the request's + * iov to the beginning of the data buffer (note the l9p_seek_iov + * in l9p_dispatch_tread). + */ +void +l9p_init_msg(struct l9p_message *msg, struct l9p_request *req, + enum l9p_pack_mode mode) +{ + + msg->lm_size = 0; + msg->lm_mode = mode; + msg->lm_cursor_iov = 0; + msg->lm_cursor_offset = 0; + msg->lm_niov = req->lr_data_niov; + memcpy(msg->lm_iov, req->lr_data_iov, + sizeof (struct iovec) * req->lr_data_niov); +} + +enum fid_lookup_flags { + F_REQUIRE_OPEN = 0x01, /* require that the file be marked OPEN */ + F_REQUIRE_DIR = 0x02, /* require that the file be marked ISDIR */ + F_REQUIRE_XATTR = 0x04, /* require that the file be marked XATTR */ + F_REQUIRE_AUTH = 0x08, /* require that the fid be marked AUTH */ + F_FORBID_OPEN = 0x10, /* forbid that the file be marked OPEN */ + F_FORBID_DIR = 0x20, /* forbid that the file be marked ISDIR */ + F_FORBID_XATTR = 0x40, /* forbid that the file be marked XATTR */ + F_ALLOW_AUTH = 0x80, /* allow that the fid be marked AUTH */ +}; + +/* + * Look up a fid. It must correspond to a valid file, else we return + * the given errno (some "not a valid fid" calls must return EIO and + * some must return EINVAL and qemu returns ENOENT in other cases and + * so on, so we just provide a general "return this error number"). + * + * Callers may also set constraints: fid must be (or not be) open, + * must be (or not be) a directory, must be (or not be) an xattr. + * + * Only one op has a fid that *must* be an auth fid. Most ops forbid + * auth fids So instead of FORBID we have ALLOW here and the default + * is FORBID. + */ +static inline int +fid_lookup(struct l9p_connection *conn, uint32_t fid, int err, int flags, + struct l9p_fid **afile) +{ + struct l9p_fid *file; + + file = ht_find(&conn->lc_files, fid); + if (file == NULL) + return (err); + + /* + * As soon as we go multithreaded / async, this + * assert has to become "return EINVAL" or "return err". + * + * We may also need a way to mark a fid as + * "in async op" (valid for some purposes, but cannot be + * used elsewhere until async op is completed or aborted). + * + * For now, this serves for bug-detecting. + */ + assert(l9p_fid_isvalid(file)); + + /* + * Note that we're inline expanded and flags is constant, + * so unnecessary tests just drop out entirely. + */ + if ((flags & F_REQUIRE_OPEN) && !l9p_fid_isopen(file)) + return (EINVAL); + if ((flags & F_FORBID_OPEN) && l9p_fid_isopen(file)) + return (EINVAL); + if ((flags & F_REQUIRE_DIR) && !l9p_fid_isdir(file)) + return (ENOTDIR); + if ((flags & F_FORBID_DIR) && l9p_fid_isdir(file)) + return (EISDIR); + if ((flags & F_REQUIRE_XATTR) && !l9p_fid_isxattr(file)) + return (EINVAL); + if ((flags & F_FORBID_XATTR) && l9p_fid_isxattr(file)) + return (EINVAL); + if (l9p_fid_isauth(file)) { + if ((flags & (F_REQUIRE_AUTH | F_ALLOW_AUTH)) == 0) + return (EINVAL); + } else if (flags & F_REQUIRE_AUTH) + return (EINVAL); + *afile = file; + return (0); +} + +/* + * Append variable-size stat object and adjust io count. + * Returns 0 if the entire stat object was packed, -1 if not. + * A fully packed object updates the request's io count. + * + * Caller must use their own private l9p_message object since + * a partially packed object will leave the message object in + * a useless state. + * + * Frees the stat object. + */ +int +l9p_pack_stat(struct l9p_message *msg, struct l9p_request *req, + struct l9p_stat *st) +{ + struct l9p_connection *conn = req->lr_conn; + uint16_t size = l9p_sizeof_stat(st, conn->lc_version); + int ret = 0; + + assert(msg->lm_mode == L9P_PACK); + + if (req->lr_resp.io.count + size > req->lr_req.io.count || + l9p_pustat(msg, st, conn->lc_version) < 0) + ret = -1; + else + req->lr_resp.io.count += size; + l9p_freestat(st); + return (ret); +} + +static int +l9p_dispatch_tversion(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_server *server = conn->lc_server; + enum l9p_version remote_version = L9P_INVALID_VERSION; + size_t i; + const char *remote_version_name; + + for (i = 0; i < N(l9p_versions); i++) { + if (strcmp(req->lr_req.version.version, + l9p_versions[i].name) == 0) { + remote_version = (enum l9p_version)i; + break; + } + } + + if (remote_version == L9P_INVALID_VERSION) { + L9P_LOG(L9P_ERROR, "unsupported remote version: %s", + req->lr_req.version.version); + return (ENOSYS); + } + + remote_version_name = l9p_versions[remote_version].name; + L9P_LOG(L9P_INFO, "remote version: %s", remote_version_name); + L9P_LOG(L9P_INFO, "local version: %s", + l9p_versions[server->ls_max_version].name); + + conn->lc_version = MIN(remote_version, server->ls_max_version); + conn->lc_msize = MIN(req->lr_req.version.msize, conn->lc_msize); + conn->lc_max_io_size = conn->lc_msize - 24; + req->lr_resp.version.version = strdup(remote_version_name); + req->lr_resp.version.msize = conn->lc_msize; + return (0); +} + +static int +l9p_dispatch_tattach(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* + * We still don't have Tauth yet, but let's code this part + * anyway. + * + * Look up the auth fid first since if it fails we can just + * return immediately. + */ + if (req->lr_req.tattach.afid != L9P_NOFID) { + error = fid_lookup(conn, req->lr_req.tattach.afid, EINVAL, + F_REQUIRE_AUTH, &req->lr_fid2); + if (error) + return (error); + } else + req->lr_fid2 = NULL; + + fid = l9p_connection_alloc_fid(conn, req->lr_req.hdr.fid); + if (fid == NULL) + return (EINVAL); + + be = conn->lc_server->ls_backend; + + req->lr_fid = fid; + + /* For backend convenience, set NONUNAME on 9P2000. */ + if (conn->lc_version == L9P_2000) + req->lr_req.tattach.n_uname = L9P_NONUNAME; + error = be->attach(be->softc, req); + + /* + * On success, fid becomes valid; on failure, disconnect. + * It certainly *should* be a directory here... + */ + if (error == 0) { + l9p_fid_setvalid(fid); + if (req->lr_resp.rattach.qid.type & L9P_QTDIR) + l9p_fid_setdir(fid); + } else + l9p_connection_remove_fid(conn, fid); + return (error); +} + +static int +l9p_dispatch_tclunk(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* Note that clunk is the only way to dispose of an auth fid. */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_ALLOW_AUTH, &fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + l9p_fid_unsetvalid(fid); + + /* + * If it's an xattr fid there must, by definition, be an + * xattrclunk. The xattrclunk function can only be NULL if + * xattrwalk and xattrcreate are NULL or always return error. + * + * Q: do we want to allow async xattrclunk in case of very + * large xattr create? This will make things difficult, + * so probably not. + */ + if (l9p_fid_isxattr(fid)) + error = be->xattrclunk(be->softc, fid); + else + error = be->clunk(be->softc, fid); + + /* fid is now gone regardless of any error return */ + l9p_connection_remove_fid(conn, fid); + return (error); +} + +static int +l9p_dispatch_tcreate(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + uint32_t dmperm; + int error; + + /* Incoming fid must represent a directory that has not been opened. */ + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + dmperm = req->lr_req.tcreate.perm; +#define MKDIR_OR_SIMILAR \ + (L9P_DMDIR | L9P_DMSYMLINK | L9P_DMNAMEDPIPE | L9P_DMSOCKET | L9P_DMDEVICE) + + /* + * TODO: + * - check new file name + * - break out different kinds of create (file vs mkdir etc) + * - add async file-create (leaves req->lr_fid in limbo) + * + * A successful file-create changes the fid into an open file. + */ + error = be->create(be->softc, req); + if (error == 0 && (dmperm & MKDIR_OR_SIMILAR) == 0) { + l9p_fid_unsetdir(req->lr_fid); + l9p_fid_setopen(req->lr_fid); + } + + return (error); +} + +static int +l9p_dispatch_topen(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_OPEN | F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: + * - add async open (leaves req->lr_fid in limbo) + */ + error = be->open(be->softc, req); + if (error == 0) + l9p_fid_setopen(req->lr_fid); + return (error); +} + +static int +l9p_dispatch_tread(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* Xattr fids are not open, so we need our own tests. */ + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &req->lr_fid); + if (error) + return (error); + + /* + * Adjust so that writing messages (packing data) starts + * right after the count field in the response. + * + * size[4] + Rread[1] + tag[2] + count[4] = 11 + */ + l9p_seek_iov(req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov, + req->lr_data_iov, &req->lr_data_niov, 11); + + /* + * If it's an xattr fid there must, by definition, be an + * xattrread. The xattrread function can only be NULL if + * xattrwalk and xattrcreate are NULL or always return error. + * + * TODO: + * separate out directory-read + * allow async read + */ + be = conn->lc_server->ls_backend; + fid = req->lr_fid; + if (l9p_fid_isxattr(fid)) { + error = be->xattrread(be->softc, req); + } else if (l9p_fid_isopen(fid)) { + error = be->read(be->softc, req); + } else { + error = EINVAL; + } + + return (error); +} + +static int +l9p_dispatch_tremove(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* + * ?? Should we allow Tremove on auth fids? If so, do + * we pretend it is just a Tclunk? + */ + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + l9p_fid_unsetvalid(fid); + + error = be->remove(be->softc, fid); + /* fid is now gone regardless of any error return */ + l9p_connection_remove_fid(conn, fid); + return (error); +} + +static int +l9p_dispatch_tstat(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* Allow Tstat on auth fid? Seems harmless enough... */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_ALLOW_AUTH, &fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + req->lr_fid = fid; + error = be->stat(be->softc, req); + + if (error == 0) { + if (l9p_fid_isauth(fid)) + req->lr_resp.rstat.stat.qid.type |= L9P_QTAUTH; + + /* should we check req->lr_resp.rstat.qid.type L9P_QTDIR bit? */ + if (req->lr_resp.rstat.stat.qid.type &= L9P_QTDIR) + l9p_fid_setdir(fid); + else + l9p_fid_unsetdir(fid); + } + + return (error); +} + +static int +l9p_dispatch_twalk(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid, *newfid; + uint16_t n; + int error; + + /* Can forbid XATTR, but cannot require DIR. */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_XATTR, &fid); + if (error) + return (error); + + if (req->lr_req.twalk.hdr.fid != req->lr_req.twalk.newfid) { + newfid = l9p_connection_alloc_fid(conn, + req->lr_req.twalk.newfid); + if (newfid == NULL) + return (EINVAL); + } else + newfid = fid; + + be = conn->lc_server->ls_backend; + req->lr_fid = fid; + req->lr_newfid = newfid; + error = be->walk(be->softc, req); + + /* + * If newfid == fid, then fid itself has (potentially) changed, + * but is still valid. Otherwise set newfid valid on + * success, and destroy it on error. + */ + if (newfid != fid) { + if (error == 0) + l9p_fid_setvalid(newfid); + else + l9p_connection_remove_fid(conn, newfid); + } + + /* + * If we walked any name elements, the last (n-1'th) qid + * has the type (dir vs file) for the new fid. Otherwise + * the type of newfid is the same as fid. Of course, if + * n==0 and fid==newfid, fid is already set up correctly + * as the whole thing was a big no-op, but it's safe to + * copy its dir bit to itself. + */ + if (error == 0) { + n = req->lr_resp.rwalk.nwqid; + if (n > 0) { + if (req->lr_resp.rwalk.wqid[n - 1].type & L9P_QTDIR) + l9p_fid_setdir(newfid); + } else { + if (l9p_fid_isdir(fid)) + l9p_fid_setdir(newfid); + } + } + return (error); +} + +static int +l9p_dispatch_twrite(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* Cannot require open due to xattr write, but can forbid dir. */ + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, + F_FORBID_DIR, &req->lr_fid); + if (error) + return (error); + + /* + * Adjust to point to the data to be written (a la + * l9p_dispatch_tread, but we're pointing into the request + * buffer rather than the response): + * + * size[4] + Twrite[1] + tag[2] + fid[4] + offset[8] + count[4] = 23 + */ + l9p_seek_iov(req->lr_req_msg.lm_iov, req->lr_req_msg.lm_niov, + req->lr_data_iov, &req->lr_data_niov, 23); + + /* + * Unlike read, write and xattrwrite are optional (for R/O fs). + * + * TODO: + * allow async write + */ + be = conn->lc_server->ls_backend; + fid = req->lr_fid; + if (l9p_fid_isxattr(fid)) { + error = be->xattrwrite != NULL ? + be->xattrwrite(be->softc, req) : ENOSYS; + } else if (l9p_fid_isopen(fid)) { + error = be->write != NULL ? + be->write(be->softc, req) : ENOSYS; + } else { + error = EINVAL; + } + + return (error); +} + +static int +l9p_dispatch_twstat(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, + F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + error = be->wstat != NULL ? be->wstat(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tstatfs(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* Should we allow statfs on auth fids? */ + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + error = be->statfs(be->softc, req); + return (error); +} + +static int +l9p_dispatch_tlopen(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_OPEN | F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: + * - add async open (leaves req->lr_fid in limbo) + */ + error = be->lopen != NULL ? be->lopen(be->softc, req) : ENOSYS; + if (error == 0) + l9p_fid_setopen(req->lr_fid); + return (error); +} + +static int +l9p_dispatch_tlcreate(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: + * - check new file name + * - add async create (leaves req->lr_fid in limbo) + */ + error = be->lcreate != NULL ? be->lcreate(be->softc, req) : ENOSYS; + if (error == 0) { + l9p_fid_unsetdir(req->lr_fid); + l9p_fid_setopen(req->lr_fid); + } + return (error); +} + +static int +l9p_dispatch_tsymlink(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* This doesn't affect the containing dir; maybe allow OPEN? */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: + * - check new file name + */ + error = be->symlink != NULL ? be->symlink(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tmknod(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* This doesn't affect the containing dir; maybe allow OPEN? */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: + * - check new file name + */ + error = be->mknod != NULL ? be->mknod(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_trename(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* Rename directory or file (including symlink etc). */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + /* Doesn't affect new dir fid; maybe allow OPEN? */ + error = fid_lookup(conn, req->lr_req.trename.dfid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: + * - check new file name (trename.name) + */ + error = be->rename != NULL ? be->rename(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_treadlink(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* + * The underlying readlink will fail unless it's a symlink, + * and the back end has to check, but we might as well forbid + * directories and open files here since it's cheap. + */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + error = be->readlink != NULL ? be->readlink(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tgetattr(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + error = be->getattr != NULL ? be->getattr(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tsetattr(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + error = be->setattr != NULL ? be->setattr(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_txattrwalk(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid, *newfid; + int error; + + /* + * Not sure if we care if file-or-dir is open or not. + * However, the fid argument should always be a file or + * dir and the newfid argument must be supplied, must + * be different, and always becomes a new xattr, + * so this is not very much like Twalk. + */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_XATTR, &fid); + if (error) + return (error); + + newfid = l9p_connection_alloc_fid(conn, req->lr_req.txattrwalk.newfid); + if (newfid == NULL) + return (EINVAL); + + be = conn->lc_server->ls_backend; + + req->lr_fid = fid; + req->lr_newfid = newfid; + error = be->xattrwalk != NULL ? be->xattrwalk(be->softc, req) : ENOSYS; + + /* + * Success/fail is similar to Twalk, except that we need + * to set the xattr type bit in the new fid. It's also + * much simpler since newfid is always a new fid. + */ + if (error == 0) { + l9p_fid_setvalid(newfid); + l9p_fid_setxattr(newfid); + } else { + l9p_connection_remove_fid(conn, newfid); + } + return (error); +} + +static int +l9p_dispatch_txattrcreate(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* + * Forbid incoming open fid since it's going to become an + * xattr fid instead. If it turns out we need to allow + * it, fs code will need to handle this. + * + * Curiously, qemu 9pfs uses ENOENT for a bad txattrwalk + * fid, but EINVAL for txattrcreate (so we do too). + */ + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, + F_FORBID_XATTR | F_FORBID_OPEN, &fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + req->lr_fid = fid; + error = be->xattrcreate != NULL ? be->xattrcreate(be->softc, req) : + ENOSYS; + + /* + * On success, fid has changed from a regular (file or dir) + * fid to an xattr fid. + */ + if (error == 0) { + l9p_fid_unsetdir(fid); + l9p_fid_setxattr(fid); + } + return (error); +} + +static int +l9p_dispatch_treaddir(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_REQUIRE_OPEN, &req->lr_fid); + if (error) + return (error); + + /* + * Adjust so that writing messages (packing data) starts + * right after the count field in the response. + * + * size[4] + Rreaddir[1] + tag[2] + count[4] = 11 + */ + l9p_seek_iov(req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov, + req->lr_data_iov, &req->lr_data_niov, 11); + + be = conn->lc_server->ls_backend; + + error = be->readdir != NULL ? be->readdir(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tfsync(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + error = be->fsync != NULL ? be->fsync(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tlock(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* Forbid directories? */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: multiple client handling; perhaps async locking. + */ + error = be->lock != NULL ? be->lock(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tgetlock(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: multiple client handling; perhaps async locking. + */ + error = be->getlock != NULL ? be->getlock(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tlink(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* + * Note, dfid goes into fid2 in current scheme. + * + * Allow open dir? Target dir fid is not modified... + */ + error = fid_lookup(conn, req->lr_req.tlink.dfid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2); + if (error) + return (error); + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_DIR | F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + error = be->link != NULL ? be->link(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tmkdir(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + /* Slashes embedded in the name are not allowed */ + if (strchr(req->lr_req.tlcreate.name, '/') != NULL) + return (EINVAL); + + be = conn->lc_server->ls_backend; + error = be->mkdir != NULL ? be->mkdir(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_trenameat(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + error = fid_lookup(conn, req->lr_req.trenameat.newdirfid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* TODO: check old and new names */ + error = be->renameat != NULL ? be->renameat(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tunlinkat(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* TODO: check dir-or-file name */ + error = be->unlinkat != NULL ? be->unlinkat(be->softc, req) : ENOSYS; + return (error); +} Index: lib/lib9p/rfuncs.h =================================================================== --- /dev/null +++ lib/lib9p/rfuncs.h @@ -0,0 +1,79 @@ +/* + * Copyright 2016 Chris Torek + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_RFUNCS_H +#define LIB9P_RFUNCS_H + +#include +#include +#include + +#if defined(WITH_CASPER) +#include +#endif + +/* + * Reentrant, optionally-malloc-ing versions of + * basename() and dirname(). + */ +char *r_basename(const char *, char *, size_t); +char *r_dirname(const char *, char *, size_t); + +/* + * Yuck: getpwuid, getgrgid are not thread-safe, and the + * POSIX replacements (getpwuid_r, getgrgid_r) are horrible. + * This is to allow us to loop over the get.*_r calls with ever + * increasing buffers until they succeed or get unreasonable + * (same idea as the libc code for the non-reentrant versions, + * although prettier). + * + * The getpwuid/getgrgid functions auto-init one of these, + * but the caller must call r_pgfree() when done with the + * return values. + * + * If we need more later, we may have to expose the init function. + */ +struct r_pgdata { + char *r_pgbuf; + size_t r_pgbufsize; + union { + struct passwd un_pw; + struct group un_gr; + } r_pgun; +}; + +/* void r_pginit(struct r_pgdata *); */ +void r_pgfree(struct r_pgdata *); +struct passwd *r_getpwuid(uid_t, struct r_pgdata *); +struct group *r_getgrgid(gid_t, struct r_pgdata *); + +#if defined(WITH_CASPER) +struct passwd *r_cap_getpwuid(cap_channel_t *, uid_t, struct r_pgdata *); +struct group *r_cap_getgrgid(cap_channel_t *, gid_t, struct r_pgdata *); +#endif + +#endif /* LIB9P_RFUNCS_H */ Index: lib/lib9p/rfuncs.c =================================================================== --- /dev/null +++ lib/lib9p/rfuncs.c @@ -0,0 +1,320 @@ +/* + * Copyright 2016 Chris Torek + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include + +#if defined(WITH_CASPER) +#include +#include +#include +#endif + +#include "rfuncs.h" + +/* + * This is essentially a clone of the BSD basename_r function, + * which is like POSIX basename() but puts the result in a user + * supplied buffer. + * + * In BSD basename_r, the buffer must be least MAXPATHLEN bytes + * long. In our case we take the size of the buffer as an argument. + * + * Note that it's impossible in general to do this without + * a temporary buffer since basename("foo/bar") is "bar", + * but basename("foo/bar/") is still "bar" -- no trailing + * slash is allowed. + * + * The return value is your supplied buffer , or NULL if + * the length of the basename of the supplied equals or + * exceeds your indicated . + * + * As a special but useful case, if you supply NULL for the + * argument, we allocate the buffer dynamically to match the + * basename, i.e., the result is basically strdup()ed for you. + * In this case is ignored (recommended: pass 0 here). + */ +char * +r_basename(const char *path, char *buf, size_t bufsize) +{ + const char *endp, *comp; + size_t len; + + /* + * NULL or empty path means ".". This is perhaps overly + * forgiving but matches libc basename_r(), and avoids + * breaking the code below. + */ + if (path == NULL || *path == '\0') { + comp = "."; + len = 1; + } else { + /* + * Back up over any trailing slashes. If we reach + * the top of the path and it's still a trailing + * slash, it's also a leading slash and the entire + * path is just "/" (or "//", or "///", etc). + */ + endp = path + strlen(path) - 1; + while (*endp == '/' && endp > path) + endp--; + /* Invariant: *endp != '/' || endp == path */ + if (*endp == '/') { + /* then endp==path and hence entire path is "/" */ + comp = "/"; + len = 1; + } else { + /* + * We handled empty strings earlier, and + * we just proved *endp != '/'. Hence + * we have a non-empty basename, ending + * at endp. + * + * Back up one path name component. The + * part between these two is the basename. + * + * Note that we only stop backing up when + * either comp==path, or comp[-1] is '/'. + * + * Suppose path[0] is '/'. Then, since *endp + * is *not* '/', we had comp>path initially, and + * stopped backing up because we found a '/' + * (perhaps path[0], perhaps a later '/'). + * + * Or, suppose path[0] is NOT '/'. Then, + * either there are no '/'s at all and + * comp==path, or comp[-1] is '/'. + * + * In all cases, we want all bytes from *comp + * to *endp, inclusive. + */ + comp = endp; + while (comp > path && comp[-1] != '/') + comp--; + len = (size_t)(endp - comp + 1); + } + } + if (buf == NULL) { + buf = malloc(len + 1); + if (buf == NULL) + return (NULL); + } else { + if (len >= bufsize) { + errno = ENAMETOOLONG; + return (NULL); + } + } + memcpy(buf, comp, len); + buf[len] = '\0'; + return (buf); +} + +/* + * This is much like POSIX dirname(), but is reentrant. + * + * We examine a path, find the directory portion, and copy that + * to a user supplied buffer of the given size . + * + * Note that dirname("/foo/bar/") is "/foo", dirname("/foo") is "/", + * and dirname("////") is "/". However, dirname("////foo/bar") is + * "////foo" (we do not resolve these leading slashes away -- this + * matches the BSD libc behavior). + * + * The return value is your supplied buffer , or NULL if + * the length of the dirname of the supplied equals or + * exceeds your indicated . + * + * As a special but useful case, if you supply NULL for the + * argument, we allocate the buffer dynamically to match the + * dirname, i.e., the result is basically strdup()ed for you. + * In this case is ignored (recommended: pass 0 here). + */ +char * +r_dirname(const char *path, char *buf, size_t bufsize) +{ + const char *endp, *dirpart; + size_t len; + + /* + * NULL or empty path means ".". This is perhaps overly + * forgiving but matches libc dirname(), and avoids breaking + * the code below. + */ + if (path == NULL || *path == '\0') { + dirpart = "."; + len = 1; + } else { + /* + * Back up over any trailing slashes, then back up + * one path name, then back up over more slashes. + * In all cases, stop as soon as endp==path so + * that we do not back out of the buffer entirely. + * + * The first loop takes care of trailing slashes + * in names like "/foo/bar//" (where the dirname + * part is to be "/foo"), the second strips out + * the non-dir-name part, and the third leaves us + * pointing to the end of the directory component. + * + * If the entire name is of the form "/foo" or + * "//foo" (or "/foo/", etc, but we already + * handled trailing slashes), we end up pointing + * to the leading "/", which is what we want; but + * if it is of the form "foo" (or "foo/", etc) we + * point to a non-slash. So, if (and only if) + * endp==path AND *endp is not '/', the dirname is + * ".", but in all cases, the LENGTH of the + * dirname is (endp-path+1). + */ + endp = path + strlen(path) - 1; + while (endp > path && *endp == '/') + endp--; + while (endp > path && *endp != '/') + endp--; + while (endp > path && *endp == '/') + endp--; + + len = (size_t)(endp - path + 1); + if (endp == path && *endp != '/') + dirpart = "."; + else + dirpart = path; + } + if (buf == NULL) { + buf = malloc(len + 1); + if (buf == NULL) + return (NULL); + } else { + if (len >= bufsize) { + errno = ENAMETOOLONG; + return (NULL); + } + } + memcpy(buf, dirpart, len); + buf[len] = '\0'; + return (buf); +} + +static void +r_pginit(struct r_pgdata *pg) +{ + + /* Note: init to half size since the first thing we do is double it */ + pg->r_pgbufsize = 1 << 9; + pg->r_pgbuf = NULL; /* note that realloc(NULL) == malloc */ +} + +static int +r_pgexpand(struct r_pgdata *pg) +{ + size_t nsize; + + nsize = pg->r_pgbufsize << 1; + if (nsize >= (1 << 20) || + (pg->r_pgbuf = realloc(pg->r_pgbuf, nsize)) == NULL) + return (ENOMEM); + return (0); +} + +void +r_pgfree(struct r_pgdata *pg) +{ + + free(pg->r_pgbuf); +} + +struct passwd * +r_getpwuid(uid_t uid, struct r_pgdata *pg) +{ + struct passwd *result = NULL; + int error; + + r_pginit(pg); + do { + error = r_pgexpand(pg); + if (error == 0) + error = getpwuid_r(uid, &pg->r_pgun.un_pw, + pg->r_pgbuf, pg->r_pgbufsize, &result); + } while (error == ERANGE); + + return (error ? NULL : result); +} + +struct group * +r_getgrgid(gid_t gid, struct r_pgdata *pg) +{ + struct group *result = NULL; + int error; + + r_pginit(pg); + do { + error = r_pgexpand(pg); + if (error == 0) + error = getgrgid_r(gid, &pg->r_pgun.un_gr, + pg->r_pgbuf, pg->r_pgbufsize, &result); + } while (error == ERANGE); + + return (error ? NULL : result); +} + +#if defined(WITH_CASPER) +struct passwd * +r_cap_getpwuid(cap_channel_t *cap, uid_t uid, struct r_pgdata *pg) +{ + struct passwd *result = NULL; + int error; + + r_pginit(pg); + do { + error = r_pgexpand(pg); + if (error == 0) + error = cap_getpwuid_r(cap, uid, &pg->r_pgun.un_pw, + pg->r_pgbuf, pg->r_pgbufsize, &result); + } while (error == ERANGE); + + return (error ? NULL : result); +} + +struct group * +r_cap_getgrgid(cap_channel_t *cap, gid_t gid, struct r_pgdata *pg) +{ + struct group *result = NULL; + int error; + + r_pginit(pg); + do { + error = r_pgexpand(pg); + if (error == 0) + error = cap_getgrgid_r(cap, gid, &pg->r_pgun.un_gr, + pg->r_pgbuf, pg->r_pgbufsize, &result); + } while (error == ERANGE); + + return (error ? NULL : result); +} +#endif Index: lib/lib9p/sbuf/sbuf.h =================================================================== --- /dev/null +++ lib/lib9p/sbuf/sbuf.h @@ -0,0 +1,55 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Minimal libsbuf reimplementation for Mac OS X. + */ + +#ifndef LIB9P_SBUF_H +#define LIB9P_SBUF_H + +#include + +struct sbuf +{ + char *s_buf; + int s_size; + int s_capacity; + int s_position; +}; + +struct sbuf *sbuf_new_auto(void); +int sbuf_cat(struct sbuf *s, const char *str); +int sbuf_printf(struct sbuf *s, const char *fmt, ...); +int sbuf_vprintf(struct sbuf *s, const char *fmt, va_list args); +int sbuf_done(struct sbuf *s); +void sbuf_delete(struct sbuf *s); +int sbuf_finish(struct sbuf *s); +char *sbuf_data(struct sbuf *s); + +#endif /* LIB9P_SBUF_H */ + Index: lib/lib9p/sbuf/sbuf.c =================================================================== --- /dev/null +++ lib/lib9p/sbuf/sbuf.c @@ -0,0 +1,127 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Minimal libsbuf reimplementation for Mac OS X. + */ + +#include +#include +#include +#include +#include +#include "sbuf.h" + +#define SBUF_INITIAL_SIZE 128 + +struct sbuf * +sbuf_new_auto() +{ + struct sbuf *s; + + s = malloc(sizeof(struct sbuf)); + s->s_buf = calloc(1, SBUF_INITIAL_SIZE + 1); + s->s_capacity = s->s_buf != NULL ? SBUF_INITIAL_SIZE : 0; + s->s_size = 0; + + return (s); +} + +int +sbuf_cat(struct sbuf *s, const char *str) +{ + int req = (int)strlen(str); + + if (s->s_size + req >= s->s_capacity) { + s->s_capacity = s->s_size + req + 1; + s->s_buf = realloc(s->s_buf, (size_t)s->s_capacity); + } + if (s->s_buf == NULL) + return (-1); + + strcpy(s->s_buf + s->s_size, str); + s->s_size += req; + + return (0); +} + +int +sbuf_printf(struct sbuf *s, const char *fmt, ...) +{ + int ret; + va_list ap; + + va_start(ap, fmt); + ret = sbuf_vprintf(s, fmt, ap); + va_end(ap); + + return (ret); +} + +int +sbuf_vprintf(struct sbuf *s, const char *fmt, va_list args) +{ + va_list copy; + int req; + + va_copy(copy, args); + req = vsnprintf(NULL, 0, fmt, copy); + va_end(copy); + + if (s->s_size + req >= s->s_capacity) { + s->s_capacity = s->s_size + req + 1; + s->s_buf = realloc(s->s_buf, (size_t)s->s_capacity); + } + if (s->s_buf == NULL) + return (-1); + + req = vsnprintf(s->s_buf + s->s_size, req + 1, fmt, args); + s->s_size += req; + + return (0); +} + +char * +sbuf_data(struct sbuf *s) +{ + return (s->s_buf); +} + +int +sbuf_finish(struct sbuf *s) +{ + if (s->s_buf != NULL) + s->s_buf[s->s_size] = '\0'; + return (0); +} + +void +sbuf_delete(struct sbuf *s) +{ + free(s->s_buf); + free(s); +} Index: lib/lib9p/threadpool.h =================================================================== --- /dev/null +++ lib/lib9p/threadpool.h @@ -0,0 +1,118 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_THREADPOOL_H +#define LIB9P_THREADPOOL_H + +#include +#include +#include +#include "lib9p.h" + +STAILQ_HEAD(l9p_request_queue, l9p_request); + +/* + * Most of the workers in the threadpool run requests. + * + * One distinguished worker delivers responses from the + * response queue. The reason this worker exists is to + * guarantee response order, so that flush responses go + * after their flushed requests. + */ +struct l9p_threadpool { + struct l9p_connection * ltp_conn; /* the connection */ + struct l9p_request_queue ltp_workq; /* requests awaiting a worker */ + struct l9p_request_queue ltp_replyq; /* requests that are done */ + pthread_mutex_t ltp_mtx; /* locks queues and cond vars */ + pthread_cond_t ltp_work_cv; /* to signal regular workers */ + pthread_cond_t ltp_reply_cv; /* to signal reply-worker */ + LIST_HEAD(, l9p_worker) ltp_workers; /* list of all workers */ +}; + +/* + * All workers, including the responder, use this as their + * control structure. (The only thing that distinguishes the + * responder is that it runs different code and waits on the + * reply_cv.) + */ +struct l9p_worker { + struct l9p_threadpool * ltw_tp; + pthread_t ltw_thread; + bool ltw_exiting; + bool ltw_responder; + LIST_ENTRY(l9p_worker) ltw_link; +}; + +/* + * Each request has a "work state" telling where the request is, + * in terms of workers working on it. That is, this tells us + * which threadpool queue, if any, the request is in now or would + * go in, or what's happening with it. + */ +enum l9p_workstate { + L9P_WS_NOTSTARTED, /* not yet started */ + L9P_WS_IMMEDIATE, /* Tflush being done sans worker */ + L9P_WS_INPROGRESS, /* worker is working on it */ + L9P_WS_RESPQUEUED, /* worker is done, response queued */ + L9P_WS_REPLYING, /* responder is in final reply path */ +}; + +/* + * Each request has a "flush state", initally NONE meaning no + * Tflush affected the request. + * + * If a Tflush comes in before we ever assign a work thread, + * the flush state goes to FLUSH_REQUESTED_PRE_START. + * + * If a Tflush comes in after we assign a work thread, the + * flush state goes to FLUSH_REQUESTED_POST_START. The flush + * request may be too late: the request might finish anyway. + * Or it might be soon enough to abort. In all cases, though, the + * operation requesting the flush (the "flusher") must wait for + * the other request (the "flushee") to go through the respond + * path. The respond routine gets to decide whether to send a + * normal response, send an error, or drop the request + * entirely. + * + * There's one especially annoying case: what if a Tflush comes in + * *while* we're sending a response? In this case it's too late: + * the flush just waits for the fully-composed response. + */ +enum l9p_flushstate { + L9P_FLUSH_NONE = 0, /* must be zero */ + L9P_FLUSH_REQUESTED_PRE_START, /* not even started before flush */ + L9P_FLUSH_REQUESTED_POST_START, /* started, then someone said flush */ + L9P_FLUSH_TOOLATE /* too late, already responding */ +}; + +void l9p_threadpool_flushee_done(struct l9p_request *); +int l9p_threadpool_init(struct l9p_threadpool *, int); +void l9p_threadpool_run(struct l9p_threadpool *, struct l9p_request *); +int l9p_threadpool_shutdown(struct l9p_threadpool *); +int l9p_threadpool_tflush(struct l9p_request *); + +#endif /* LIB9P_THREADPOOL_H */ Index: lib/lib9p/threadpool.c =================================================================== --- /dev/null +++ lib/lib9p/threadpool.c @@ -0,0 +1,422 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#if defined(__FreeBSD__) +#include +#endif +#include +#include "lib9p.h" +#include "threadpool.h" + +static void l9p_threadpool_rflush(struct l9p_threadpool *tp, + struct l9p_request *req); + +static void * +l9p_responder(void *arg) +{ + struct l9p_threadpool *tp; + struct l9p_worker *worker = arg; + struct l9p_request *req; + + tp = worker->ltw_tp; + for (;;) { + /* get next reply to send */ + pthread_mutex_lock(&tp->ltp_mtx); + while (STAILQ_EMPTY(&tp->ltp_replyq) && !worker->ltw_exiting) + pthread_cond_wait(&tp->ltp_reply_cv, &tp->ltp_mtx); + if (worker->ltw_exiting) { + pthread_mutex_unlock(&tp->ltp_mtx); + break; + } + + /* off reply queue */ + req = STAILQ_FIRST(&tp->ltp_replyq); + STAILQ_REMOVE_HEAD(&tp->ltp_replyq, lr_worklink); + + /* request is now in final glide path, can't be Tflush-ed */ + req->lr_workstate = L9P_WS_REPLYING; + + /* any flushers waiting for this request can go now */ + if (req->lr_flushstate != L9P_FLUSH_NONE) + l9p_threadpool_rflush(tp, req); + + pthread_mutex_unlock(&tp->ltp_mtx); + + /* send response */ + l9p_respond(req, false, true); + } + return (NULL); +} + +static void * +l9p_worker(void *arg) +{ + struct l9p_threadpool *tp; + struct l9p_worker *worker = arg; + struct l9p_request *req; + + tp = worker->ltw_tp; + pthread_mutex_lock(&tp->ltp_mtx); + for (;;) { + while (STAILQ_EMPTY(&tp->ltp_workq) && !worker->ltw_exiting) + pthread_cond_wait(&tp->ltp_work_cv, &tp->ltp_mtx); + if (worker->ltw_exiting) + break; + + /* off work queue; now work-in-progress, by us */ + req = STAILQ_FIRST(&tp->ltp_workq); + STAILQ_REMOVE_HEAD(&tp->ltp_workq, lr_worklink); + req->lr_workstate = L9P_WS_INPROGRESS; + req->lr_worker = worker; + pthread_mutex_unlock(&tp->ltp_mtx); + + /* actually try the request */ + req->lr_error = l9p_dispatch_request(req); + + /* move to responder queue, updating work-state */ + pthread_mutex_lock(&tp->ltp_mtx); + req->lr_workstate = L9P_WS_RESPQUEUED; + req->lr_worker = NULL; + STAILQ_INSERT_TAIL(&tp->ltp_replyq, req, lr_worklink); + + /* signal the responder */ + pthread_cond_signal(&tp->ltp_reply_cv); + } + pthread_mutex_unlock(&tp->ltp_mtx); + return (NULL); +} + +/* + * Just before finally replying to a request that got touched by + * a Tflush request, we enqueue its flushers (requests of type + * Tflush, which are now on the flushee's lr_flushq) onto the + * response queue. + */ +static void +l9p_threadpool_rflush(struct l9p_threadpool *tp, struct l9p_request *req) +{ + struct l9p_request *flusher; + + /* + * https://swtch.com/plan9port/man/man9/flush.html says: + * + * "Should multiple Tflushes be received for a pending + * request, they must be answered in order. A Rflush for + * any of the multiple Tflushes implies an answer for all + * previous ones. Therefore, should a server receive a + * request and then multiple flushes for that request, it + * need respond only to the last flush." This means + * we could march through the queue of flushers here, + * marking all but the last one as "to be dropped" rather + * than "to be replied-to". + * + * However, we'll leave that for later, if ever -- it + * should be harmless to respond to each, in order. + */ + STAILQ_FOREACH(flusher, &req->lr_flushq, lr_flushlink) { + flusher->lr_workstate = L9P_WS_RESPQUEUED; +#ifdef notdef + if (not the last) { + flusher->lr_flushstate = L9P_FLUSH_NOT_RUN; + /* or, flusher->lr_drop = true ? */ + } +#endif + STAILQ_INSERT_TAIL(&tp->ltp_replyq, flusher, lr_worklink); + } +} + +int +l9p_threadpool_init(struct l9p_threadpool *tp, int size) +{ + struct l9p_worker *worker; +#if defined(__FreeBSD__) + char threadname[16]; +#endif + int error; + int i, nworkers, nresponders; + + if (size <= 0) + return (EINVAL); + error = pthread_mutex_init(&tp->ltp_mtx, NULL); + if (error) + return (error); + error = pthread_cond_init(&tp->ltp_work_cv, NULL); + if (error) + goto fail_work_cv; + error = pthread_cond_init(&tp->ltp_reply_cv, NULL); + if (error) + goto fail_reply_cv; + + STAILQ_INIT(&tp->ltp_workq); + STAILQ_INIT(&tp->ltp_replyq); + LIST_INIT(&tp->ltp_workers); + + nresponders = 0; + nworkers = 0; + for (i = 0; i <= size; i++) { + worker = calloc(1, sizeof(struct l9p_worker)); + worker->ltw_tp = tp; + worker->ltw_responder = i == 0; + error = pthread_create(&worker->ltw_thread, NULL, + worker->ltw_responder ? l9p_responder : l9p_worker, + (void *)worker); + if (error) { + free(worker); + break; + } + if (worker->ltw_responder) + nresponders++; + else + nworkers++; + +#if defined(__FreeBSD__) + if (worker->ltw_responder) { + pthread_set_name_np(worker->ltw_thread, "9p-responder"); + } else { + sprintf(threadname, "9p-worker:%d", i - 1); + pthread_set_name_np(worker->ltw_thread, threadname); + } +#endif + + LIST_INSERT_HEAD(&tp->ltp_workers, worker, ltw_link); + } + if (nresponders == 0 || nworkers == 0) { + /* need the one responder, and at least one worker */ + l9p_threadpool_shutdown(tp); + return (error); + } + return (0); + + /* + * We could avoid these labels by having multiple destroy + * paths (one for each error case), or by having booleans + * for which variables were initialized. Neither is very + * appealing... + */ +fail_reply_cv: + pthread_cond_destroy(&tp->ltp_work_cv); +fail_work_cv: + pthread_mutex_destroy(&tp->ltp_mtx); + + return (error); +} + +/* + * Run a request, usually by queueing it. + */ +void +l9p_threadpool_run(struct l9p_threadpool *tp, struct l9p_request *req) +{ + + /* + * Flush requests must be handled specially, since they + * can cancel / kill off regular requests. (But we can + * run them through the regular dispatch mechanism.) + */ + if (req->lr_req.hdr.type == L9P_TFLUSH) { + /* not on a work queue yet so we can touch state */ + req->lr_workstate = L9P_WS_IMMEDIATE; + (void) l9p_dispatch_request(req); + } else { + pthread_mutex_lock(&tp->ltp_mtx); + req->lr_workstate = L9P_WS_NOTSTARTED; + STAILQ_INSERT_TAIL(&tp->ltp_workq, req, lr_worklink); + pthread_cond_signal(&tp->ltp_work_cv); + pthread_mutex_unlock(&tp->ltp_mtx); + } +} + +/* + * Run a Tflush request. Called via l9p_dispatch_request() since + * it has some debug code in it, but not called from worker thread. + */ +int +l9p_threadpool_tflush(struct l9p_request *req) +{ + struct l9p_connection *conn; + struct l9p_threadpool *tp; + struct l9p_request *flushee; + uint16_t oldtag; + enum l9p_flushstate nstate; + + /* + * Find what we're supposed to flush (the flushee, as it were). + */ + req->lr_error = 0; /* Tflush always succeeds */ + conn = req->lr_conn; + tp = &conn->lc_tp; + oldtag = req->lr_req.tflush.oldtag; + ht_wrlock(&conn->lc_requests); + flushee = ht_find_locked(&conn->lc_requests, oldtag); + if (flushee == NULL) { + /* + * Nothing to flush! The old request must have + * been done and gone already. Just queue this + * Tflush for a success reply. + */ + ht_unlock(&conn->lc_requests); + pthread_mutex_lock(&tp->ltp_mtx); + goto done; + } + + /* + * Found the original request. We'll need to inspect its + * work-state to figure out what to do. + */ + pthread_mutex_lock(&tp->ltp_mtx); + ht_unlock(&conn->lc_requests); + + switch (flushee->lr_workstate) { + + case L9P_WS_NOTSTARTED: + /* + * Flushee is on work queue, but not yet being + * handled by a worker. + * + * The documentation -- see + * http://ericvh.github.io/9p-rfc/rfc9p2000.html + * https://swtch.com/plan9port/man/man9/flush.html + * -- says that "the server should answer the + * flush message immediately". However, Linux + * sends flush requests for operations that + * must finish, such as Tclunk, and it's not + * possible to *answer* the flush request until + * it has been handled (if necessary) or aborted + * (if allowed). + * + * We therefore now just the original request + * and let the request-handler do whatever is + * appropriate. NOTE: we could have a table of + * "requests that can be aborted without being + * run" vs "requests that must be run to be + * aborted", but for now that seems like an + * unnecessary complication. + */ + nstate = L9P_FLUSH_REQUESTED_PRE_START; + break; + + case L9P_WS_IMMEDIATE: + /* + * This state only applies to Tflush requests, and + * flushing a Tflush is illegal. But we'll do nothing + * special here, which will make us act like a flush + * request for the flushee that arrived too late to + * do anything about the flushee. + */ + nstate = L9P_FLUSH_REQUESTED_POST_START; + break; + + case L9P_WS_INPROGRESS: + /* + * Worker thread flushee->lr_worker is working on it. + * Kick it to get it out of blocking system calls. + * (This requires that it carefully set up some + * signal handlers, and may be FreeBSD-dependent, + * it probably cannot be handled this way on MacOS.) + */ +#ifdef notyet + pthread_kill(...); +#endif + nstate = L9P_FLUSH_REQUESTED_POST_START; + break; + + case L9P_WS_RESPQUEUED: + /* + * The flushee is already in the response queue. + * We'll just mark it as having had some flush + * action applied. + */ + nstate = L9P_FLUSH_TOOLATE; + break; + + case L9P_WS_REPLYING: + /* + * Although we found the flushee, it's too late to + * make us depend on it: it's already heading out + * the door as a reply. + * + * We don't want to do anything to the flushee. + * Instead, we want to work the same way as if + * we had never found the tag. + */ + goto done; + } + + /* + * Now add us to the list of Tflush-es that are waiting + * for the flushee (creating the list if needed, i.e., if + * this is the first Tflush for the flushee). We (req) + * will get queued for reply later, when the responder + * processes the flushee and calls l9p_threadpool_rflush(). + */ + if (flushee->lr_flushstate == L9P_FLUSH_NONE) + STAILQ_INIT(&flushee->lr_flushq); + flushee->lr_flushstate = nstate; + STAILQ_INSERT_TAIL(&flushee->lr_flushq, req, lr_flushlink); + + pthread_mutex_unlock(&tp->ltp_mtx); + + return (0); + +done: + /* + * This immediate op is ready to be replied-to now, so just + * stick it onto the reply queue. + */ + req->lr_workstate = L9P_WS_RESPQUEUED; + STAILQ_INSERT_TAIL(&tp->ltp_replyq, req, lr_worklink); + pthread_mutex_unlock(&tp->ltp_mtx); + pthread_cond_signal(&tp->ltp_reply_cv); + return (0); +} + +int +l9p_threadpool_shutdown(struct l9p_threadpool *tp) +{ + struct l9p_worker *worker, *tmp; + + LIST_FOREACH_SAFE(worker, &tp->ltp_workers, ltw_link, tmp) { + pthread_mutex_lock(&tp->ltp_mtx); + worker->ltw_exiting = true; + if (worker->ltw_responder) + pthread_cond_signal(&tp->ltp_reply_cv); + else + pthread_cond_broadcast(&tp->ltp_work_cv); + pthread_mutex_unlock(&tp->ltp_mtx); + pthread_join(worker->ltw_thread, NULL); + LIST_REMOVE(worker, ltw_link); + free(worker); + } + pthread_cond_destroy(&tp->ltp_reply_cv); + pthread_cond_destroy(&tp->ltp_work_cv); + pthread_mutex_destroy(&tp->ltp_mtx); + + return (0); +} Index: lib/lib9p/transport/socket.h =================================================================== --- /dev/null +++ lib/lib9p/transport/socket.h @@ -0,0 +1,40 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_SOCKET_H +#define LIB9P_SOCKET_H + +#include +#include +#include "../lib9p.h" + +int l9p_start_server(struct l9p_server *server, const char *host, + const char *port); +void l9p_socket_accept(struct l9p_server *server, int conn_fd, + struct sockaddr *client_addr, socklen_t client_addr_len); + +#endif /* LIB9P_SOCKET_H */ Index: lib/lib9p/transport/socket.c =================================================================== --- /dev/null +++ lib/lib9p/transport/socket.c @@ -0,0 +1,363 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#ifdef __APPLE__ +# include "../apple_endian.h" +#else +# include +#endif +#include +#include +#include +#include +#include "../lib9p.h" +#include "../lib9p_impl.h" +#include "../log.h" +#include "socket.h" + +struct l9p_socket_softc +{ + struct l9p_connection *ls_conn; + struct sockaddr ls_sockaddr; + socklen_t ls_socklen; + pthread_t ls_thread; + int ls_fd; +}; + +static int l9p_socket_readmsg(struct l9p_socket_softc *, void **, size_t *); +static int l9p_socket_get_response_buffer(struct l9p_request *, + struct iovec *, size_t *, void *); +static int l9p_socket_send_response(struct l9p_request *, const struct iovec *, + const size_t, const size_t, void *); +static void l9p_socket_drop_response(struct l9p_request *, const struct iovec *, + size_t, void *); +static void *l9p_socket_thread(void *); +static ssize_t xread(int, void *, size_t); +static ssize_t xwrite(int, void *, size_t); + +int +l9p_start_server(struct l9p_server *server, const char *host, const char *port) +{ + struct addrinfo *res, *res0, hints; + struct kevent kev[2]; + struct kevent event[2]; + int err, kq, i, val, evs, nsockets = 0; + int sockets[2]; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = PF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + err = getaddrinfo(host, port, &hints, &res0); + + if (err) + return (-1); + + for (res = res0; res; res = res->ai_next) { + int s = socket(res->ai_family, res->ai_socktype, + res->ai_protocol); + + val = 1; + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); + + if (s < 0) + continue; + + if (bind(s, res->ai_addr, res->ai_addrlen) < 0) { + close(s); + continue; + } + + sockets[nsockets] = s; + EV_SET(&kev[nsockets++], s, EVFILT_READ, EV_ADD | EV_ENABLE, 0, + 0, 0); + listen(s, 10); + } + + if (nsockets < 1) { + L9P_LOG(L9P_ERROR, "bind(): %s", strerror(errno)); + return(-1); + } + + kq = kqueue(); + + if (kevent(kq, kev, nsockets, NULL, 0, NULL) < 0) { + L9P_LOG(L9P_ERROR, "kevent(): %s", strerror(errno)); + return (-1); + } + + for (;;) { + evs = kevent(kq, NULL, 0, event, nsockets, NULL); + if (evs < 0) { + if (errno == EINTR) + continue; + + L9P_LOG(L9P_ERROR, "kevent(): %s", strerror(errno)); + return (-1); + } + + for (i = 0; i < evs; i++) { + struct sockaddr client_addr; + socklen_t client_addr_len = sizeof(client_addr); + int news = accept((int)event[i].ident, &client_addr, + &client_addr_len); + + if (news < 0) { + L9P_LOG(L9P_WARNING, "accept(): %s", + strerror(errno)); + continue; + } + + l9p_socket_accept(server, news, &client_addr, + client_addr_len); + } + } + +} + +void +l9p_socket_accept(struct l9p_server *server, int conn_fd, + struct sockaddr *client_addr, socklen_t client_addr_len) +{ + struct l9p_socket_softc *sc; + struct l9p_connection *conn; + char host[NI_MAXHOST + 1]; + char serv[NI_MAXSERV + 1]; + int err; + + err = getnameinfo(client_addr, client_addr_len, host, NI_MAXHOST, serv, + NI_MAXSERV, NI_NUMERICHOST | NI_NUMERICSERV); + + if (err != 0) { + L9P_LOG(L9P_WARNING, "cannot look up client name: %s", + gai_strerror(err)); + } else { + L9P_LOG(L9P_INFO, "new connection from %s:%s", host, serv); + } + + if (l9p_connection_init(server, &conn) != 0) { + L9P_LOG(L9P_ERROR, "cannot create new connection"); + return; + } + + sc = l9p_calloc(1, sizeof(*sc)); + sc->ls_conn = conn; + sc->ls_fd = conn_fd; + + /* + * Fill in transport handler functions and aux argument. + */ + conn->lc_lt.lt_aux = sc; + conn->lc_lt.lt_get_response_buffer = l9p_socket_get_response_buffer; + conn->lc_lt.lt_send_response = l9p_socket_send_response; + conn->lc_lt.lt_drop_response = l9p_socket_drop_response; + + err = pthread_create(&sc->ls_thread, NULL, l9p_socket_thread, sc); + if (err) { + L9P_LOG(L9P_ERROR, + "pthread_create (for connection from %s:%s): error %s", + host, serv, strerror(err)); + l9p_connection_close(sc->ls_conn); + free(sc); + } +} + +static void * +l9p_socket_thread(void *arg) +{ + struct l9p_socket_softc *sc = (struct l9p_socket_softc *)arg; + struct iovec iov; + void *buf; + size_t length; + + for (;;) { + if (l9p_socket_readmsg(sc, &buf, &length) != 0) + break; + + iov.iov_base = buf; + iov.iov_len = length; + l9p_connection_recv(sc->ls_conn, &iov, 1, NULL); + free(buf); + } + + L9P_LOG(L9P_INFO, "connection closed"); + l9p_connection_close(sc->ls_conn); + free(sc); + return (NULL); +} + +static int +l9p_socket_readmsg(struct l9p_socket_softc *sc, void **buf, size_t *size) +{ + uint32_t msize; + size_t toread; + ssize_t ret; + void *buffer; + int fd = sc->ls_fd; + + assert(fd > 0); + + buffer = l9p_malloc(sizeof(uint32_t)); + + ret = xread(fd, buffer, sizeof(uint32_t)); + if (ret < 0) { + L9P_LOG(L9P_ERROR, "read(): %s", strerror(errno)); + return (-1); + } + + if (ret != sizeof(uint32_t)) { + if (ret == 0) + L9P_LOG(L9P_DEBUG, "%p: EOF", (void *)sc->ls_conn); + else + L9P_LOG(L9P_ERROR, + "short read: %zd bytes of %zd expected", + ret, sizeof(uint32_t)); + return (-1); + } + + msize = le32toh(*(uint32_t *)buffer); + toread = msize - sizeof(uint32_t); + buffer = l9p_realloc(buffer, msize); + + ret = xread(fd, (char *)buffer + sizeof(uint32_t), toread); + if (ret < 0) { + L9P_LOG(L9P_ERROR, "read(): %s", strerror(errno)); + return (-1); + } + + if (ret != (ssize_t)toread) { + L9P_LOG(L9P_ERROR, "short read: %zd bytes of %zd expected", + ret, toread); + return (-1); + } + + *size = msize; + *buf = buffer; + L9P_LOG(L9P_INFO, "%p: read complete message, buf=%p size=%d", + (void *)sc->ls_conn, buffer, msize); + + return (0); +} + +static int +l9p_socket_get_response_buffer(struct l9p_request *req, struct iovec *iov, + size_t *niovp, void *arg __unused) +{ + size_t size = req->lr_conn->lc_msize; + void *buf; + + buf = l9p_malloc(size); + iov[0].iov_base = buf; + iov[0].iov_len = size; + + *niovp = 1; + return (0); +} + +static int +l9p_socket_send_response(struct l9p_request *req __unused, + const struct iovec *iov, const size_t niov __unused, const size_t iolen, + void *arg) +{ + struct l9p_socket_softc *sc = (struct l9p_socket_softc *)arg; + + assert(sc->ls_fd >= 0); + + L9P_LOG(L9P_DEBUG, "%p: sending reply, buf=%p, size=%d", arg, + iov[0].iov_base, iolen); + + if (xwrite(sc->ls_fd, iov[0].iov_base, iolen) != (int)iolen) { + L9P_LOG(L9P_ERROR, "short write: %s", strerror(errno)); + return (-1); + } + + free(iov[0].iov_base); + return (0); +} + +static void +l9p_socket_drop_response(struct l9p_request *req __unused, + const struct iovec *iov, size_t niov __unused, void *arg) +{ + + L9P_LOG(L9P_DEBUG, "%p: drop buf=%p", arg, iov[0].iov_base); + free(iov[0].iov_base); +} + +static ssize_t +xread(int fd, void *buf, size_t count) +{ + size_t done = 0; + ssize_t ret; + + while (done < count) { + ret = read(fd, (char *)buf + done, count - done); + if (ret < 0) { + if (errno == EINTR) + continue; + + return (-1); + } + + if (ret == 0) + return ((ssize_t)done); + + done += (size_t)ret; + } + + return ((ssize_t)done); +} + +static ssize_t +xwrite(int fd, void *buf, size_t count) +{ + size_t done = 0; + ssize_t ret; + + while (done < count) { + ret = write(fd, (char *)buf + done, count - done); + if (ret < 0) { + if (errno == EINTR) + continue; + + return (-1); + } + + if (ret == 0) + return ((ssize_t)done); + + done += (size_t)ret; + } + + return ((ssize_t)done); +} Index: lib/lib9p/utils.c =================================================================== --- /dev/null +++ lib/lib9p/utils.c @@ -0,0 +1,1268 @@ +/* + * Copyright 2016 Jakub Klama + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__FreeBSD__) +#include +#else +#include "sbuf/sbuf.h" +#endif +#include "lib9p.h" +#include "fcall.h" +#include "linux_errno.h" + +#ifdef __APPLE__ + #define GETGROUPS_GROUP_TYPE_IS_INT +#endif + +#define N(ary) (sizeof(ary) / sizeof(*ary)) + +/* See l9p_describe_bits() below. */ +struct descbits { + uint64_t db_mask; /* mask value */ + uint64_t db_match; /* match value */ + const char *db_name; /* name for matched value */ +}; + + +static bool l9p_describe_bits(const char *, uint64_t, const char *, + const struct descbits *, struct sbuf *); +static void l9p_describe_fid(const char *, uint32_t, struct sbuf *); +static void l9p_describe_mode(const char *, uint32_t, struct sbuf *); +static void l9p_describe_name(const char *, char *, struct sbuf *); +static void l9p_describe_perm(const char *, uint32_t, struct sbuf *); +static void l9p_describe_lperm(const char *, uint32_t, struct sbuf *); +static void l9p_describe_qid(const char *, struct l9p_qid *, struct sbuf *); +static void l9p_describe_l9stat(const char *, struct l9p_stat *, + enum l9p_version, struct sbuf *); +static void l9p_describe_statfs(const char *, struct l9p_statfs *, + struct sbuf *); +static void l9p_describe_time(struct sbuf *, const char *, uint64_t, uint64_t); +static void l9p_describe_readdir(struct sbuf *, struct l9p_f_io *); +static void l9p_describe_size(const char *, uint64_t, struct sbuf *); +static void l9p_describe_ugid(const char *, uint32_t, struct sbuf *); +static void l9p_describe_getattr_mask(uint64_t, struct sbuf *); +static void l9p_describe_unlinkat_flags(const char *, uint32_t, struct sbuf *); +static const char *lookup_linux_errno(uint32_t); + +/* + * Using indexed initializers, we can have these occur in any order. + * Using adjacent-string concatenation ("T" #name, "R" #name), we + * get both Tfoo and Rfoo strings with one copy of the name. + * Alas, there is no stupid cpp trick to lowercase-ify, so we + * have to write each name twice. In which case we might as well + * make the second one a string in the first place and not bother + * with the stringizing. + * + * This table should have entries for each enum value in fcall.h. + */ +#define X(NAME, name) [L9P_T##NAME - L9P__FIRST] = "T" name, \ + [L9P_R##NAME - L9P__FIRST] = "R" name +static const char *ftype_names[] = { + X(VERSION, "version"), + X(AUTH, "auth"), + X(ATTACH, "attach"), + X(ERROR, "error"), + X(LERROR, "lerror"), + X(FLUSH, "flush"), + X(WALK, "walk"), + X(OPEN, "open"), + X(CREATE, "create"), + X(READ, "read"), + X(WRITE, "write"), + X(CLUNK, "clunk"), + X(REMOVE, "remove"), + X(STAT, "stat"), + X(WSTAT, "wstat"), + X(STATFS, "statfs"), + X(LOPEN, "lopen"), + X(LCREATE, "lcreate"), + X(SYMLINK, "symlink"), + X(MKNOD, "mknod"), + X(RENAME, "rename"), + X(READLINK, "readlink"), + X(GETATTR, "getattr"), + X(SETATTR, "setattr"), + X(XATTRWALK, "xattrwalk"), + X(XATTRCREATE, "xattrcreate"), + X(READDIR, "readdir"), + X(FSYNC, "fsync"), + X(LOCK, "lock"), + X(GETLOCK, "getlock"), + X(LINK, "link"), + X(MKDIR, "mkdir"), + X(RENAMEAT, "renameat"), + X(UNLINKAT, "unlinkat"), +}; +#undef X + +void +l9p_seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2, + size_t *niov2, size_t seek) +{ + size_t remainder = 0; + size_t left = seek; + size_t i, j; + + for (i = 0; i < niov1; i++) { + size_t toseek = MIN(left, iov1[i].iov_len); + left -= toseek; + + if (toseek == iov1[i].iov_len) + continue; + + if (left == 0) { + remainder = toseek; + break; + } + } + + for (j = i; j < niov1; j++) { + iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder; + iov2[j - i].iov_len = iov1[j].iov_len - remainder; + remainder = 0; + } + + *niov2 = j - i; +} + +size_t +l9p_truncate_iov(struct iovec *iov, size_t niov, size_t length) +{ + size_t i, done = 0; + + for (i = 0; i < niov; i++) { + size_t toseek = MIN(length - done, iov[i].iov_len); + done += toseek; + + if (toseek < iov[i].iov_len) { + iov[i].iov_len = toseek; + return (i + 1); + } + } + + return (niov); +} + +/* + * This wrapper for getgrouplist() that malloc'ed memory, and + * papers over FreeBSD vs Mac differences in the getgrouplist() + * argument types. + * + * Note that this function guarantees that *either*: + * return value != NULL and *angroups has been set + * or: return value == NULL and *angroups is 0 + */ +gid_t * +l9p_getgrlist(const char *name, gid_t basegid, int *angroups) +{ +#ifdef GETGROUPS_GROUP_TYPE_IS_INT + int i, *int_groups; +#endif + gid_t *groups; + int ngroups; + + /* + * Todo, perhaps: while getgrouplist() returns -1, expand. + * For now just use NGROUPS_MAX. + */ + ngroups = NGROUPS_MAX; + groups = malloc((size_t)ngroups * sizeof(*groups)); +#ifdef GETGROUPS_GROUP_TYPE_IS_INT + int_groups = groups ? malloc((size_t)ngroups * sizeof(*int_groups)) : + NULL; + if (int_groups == NULL) { + free(groups); + groups = NULL; + } +#endif + if (groups == NULL) { + *angroups = 0; + return (NULL); + } +#ifdef GETGROUPS_GROUP_TYPE_IS_INT + (void) getgrouplist(name, (int)basegid, int_groups, &ngroups); + for (i = 0; i < ngroups; i++) + groups[i] = (gid_t)int_groups[i]; +#else + (void) getgrouplist(name, basegid, groups, &ngroups); +#endif + *angroups = ngroups; + return (groups); +} + +/* + * For the various debug describe ops: decode bits in a bit-field-y + * value. For example, we might produce: + * value=0x3c[FOO,BAR,QUUX,?0x20] + * when FOO is bit 0x10, BAR is 0x08, and QUUX is 0x04 (as defined + * by the table). This leaves 0x20 (bit 5) as a mystery, while bits + * 4, 3, and 2 were decoded. (Bits 0 and 1 were 0 on input hence + * were not attempted here.) + * + * For general use we take a uint64_t . The bit description + * table is an array of {mask, match, str} values ending with + * {0, 0, NULL}. + * + * If is non-NULL we'll print it and the mask as well (if + * str is NULL we'll print neither). The mask is always printed in + * hex at the moment. See undec description too. + * + * For convenience, you can use a mask-and-match value, e.g., to + * decode a 2-bit field in bits 0 and 1 you can mask against 3 and + * match the values 0, 1, 2, and 3. To handle this, make sure that + * all masks-with-same-match are sequential. + * + * If there are any nonzero undecoded bits, print them after + * all the decode-able bits have been handled. + * + * The argument defines the open and close bracket characters, + * typically "[]", that surround the entire string. If NULL, no + * brackets are added, else oc[0] goes in the front and oc[1] at + * the end, after printing any part. + * + * Returns true if it printed anything (other than the implied + * str-and-value, that is). + */ +static bool +l9p_describe_bits(const char *str, uint64_t value, const char *oc, + const struct descbits *db, struct sbuf *sb) +{ + const char *sep; + char bracketbuf[2] = ""; + bool printed = false; + + if (str != NULL) + sbuf_printf(sb, "%s0x%" PRIx64, str, value); + + if (oc != NULL) + bracketbuf[0] = oc[0]; + sep = bracketbuf; + for (; db->db_name != NULL; db++) { + if ((value & db->db_mask) == db->db_match) { + sbuf_printf(sb, "%s%s", sep, db->db_name); + sep = ","; + printed = true; + + /* + * Clear the field, and make sure we + * won't match a zero-valued field with + * this same mask. + */ + value &= ~db->db_mask; + while (db[1].db_mask == db->db_mask && + db[1].db_name != NULL) + db++; + } + } + if (value != 0) { + sbuf_printf(sb, "%s?0x%" PRIx64, sep, value); + printed = true; + } + if (printed && oc != NULL) { + bracketbuf[0] = oc[1]; + sbuf_cat(sb, bracketbuf); + } + return (printed); +} + +/* + * Show file ID. + */ +static void +l9p_describe_fid(const char *str, uint32_t fid, struct sbuf *sb) +{ + + sbuf_printf(sb, "%s%" PRIu32, str, fid); +} + +/* + * Show user or group ID. + */ +static void +l9p_describe_ugid(const char *str, uint32_t ugid, struct sbuf *sb) +{ + + sbuf_printf(sb, "%s%" PRIu32, str, ugid); +} + +/* + * Show file mode (O_RDWR, O_RDONLY, etc). The argument is + * an l9p_omode, not a Linux flags mode. Linux flags are + * decoded with l9p_describe_lflags. + */ +static void +l9p_describe_mode(const char *str, uint32_t mode, struct sbuf *sb) +{ + static const struct descbits bits[] = { + { L9P_OACCMODE, L9P_OREAD, "OREAD" }, + { L9P_OACCMODE, L9P_OWRITE, "OWRITE" }, + { L9P_OACCMODE, L9P_ORDWR, "ORDWR" }, + { L9P_OACCMODE, L9P_OEXEC, "OEXEC" }, + + { L9P_OCEXEC, L9P_OCEXEC, "OCEXEC" }, + { L9P_ODIRECT, L9P_ODIRECT, "ODIRECT" }, + { L9P_ORCLOSE, L9P_ORCLOSE, "ORCLOSE" }, + { L9P_OTRUNC, L9P_OTRUNC, "OTRUNC" }, + { 0, 0, NULL } + }; + + (void) l9p_describe_bits(str, mode, "[]", bits, sb); +} + +/* + * Show Linux mode/flags. + */ +static void +l9p_describe_lflags(const char *str, uint32_t flags, struct sbuf *sb) +{ + static const struct descbits bits[] = { + { L9P_OACCMODE, L9P_OREAD, "O_READ" }, + { L9P_OACCMODE, L9P_OWRITE, "O_WRITE" }, + { L9P_OACCMODE, L9P_ORDWR, "O_RDWR" }, + { L9P_OACCMODE, L9P_OEXEC, "O_EXEC" }, + + { L9P_L_O_APPEND, L9P_L_O_APPEND, "O_APPEND" }, + { L9P_L_O_CLOEXEC, L9P_L_O_CLOEXEC, "O_CLOEXEC" }, + { L9P_L_O_CREAT, L9P_L_O_CREAT, "O_CREAT" }, + { L9P_L_O_DIRECT, L9P_L_O_DIRECT, "O_DIRECT" }, + { L9P_L_O_DIRECTORY, L9P_L_O_DIRECTORY, "O_DIRECTORY" }, + { L9P_L_O_DSYNC, L9P_L_O_DSYNC, "O_DSYNC" }, + { L9P_L_O_EXCL, L9P_L_O_EXCL, "O_EXCL" }, + { L9P_L_O_FASYNC, L9P_L_O_FASYNC, "O_FASYNC" }, + { L9P_L_O_LARGEFILE, L9P_L_O_LARGEFILE, "O_LARGEFILE" }, + { L9P_L_O_NOATIME, L9P_L_O_NOATIME, "O_NOATIME" }, + { L9P_L_O_NOCTTY, L9P_L_O_NOCTTY, "O_NOCTTY" }, + { L9P_L_O_NOFOLLOW, L9P_L_O_NOFOLLOW, "O_NOFOLLOW" }, + { L9P_L_O_NONBLOCK, L9P_L_O_NONBLOCK, "O_NONBLOCK" }, + { L9P_L_O_PATH, L9P_L_O_PATH, "O_PATH" }, + { L9P_L_O_SYNC, L9P_L_O_SYNC, "O_SYNC" }, + { L9P_L_O_TMPFILE, L9P_L_O_TMPFILE, "O_TMPFILE" }, + { L9P_L_O_TMPFILE, L9P_L_O_TMPFILE, "O_TMPFILE" }, + { L9P_L_O_TRUNC, L9P_L_O_TRUNC, "O_TRUNC" }, + { 0, 0, NULL } + }; + + (void) l9p_describe_bits(str, flags, "[]", bits, sb); +} + +/* + * Show file name or other similar, potentially-very-long string. + * Actual strings get quotes, a NULL name (if it occurs) gets + * (no quotes), so you can tell the difference. + */ +static void +l9p_describe_name(const char *str, char *name, struct sbuf *sb) +{ + size_t len; + + if (name == NULL) { + sbuf_printf(sb, "%s", str); + return; + } + + len = strlen(name); + + if (len > 32) + sbuf_printf(sb, "%s\"%.*s...\"", str, 32 - 3, name); + else + sbuf_printf(sb, "%s\"%.*s\"", str, (int)len, name); +} + +/* + * Show permissions (rwx etc). Prints the value in hex only if + * the rwx bits do not cover the entire value. + */ +static void +l9p_describe_perm(const char *str, uint32_t mode, struct sbuf *sb) +{ + char pbuf[12]; + + strmode(mode & 0777, pbuf); + if ((mode & ~(uint32_t)0777) != 0) + sbuf_printf(sb, "%s0x%" PRIx32 "<%.9s>", str, mode, pbuf + 1); + else + sbuf_printf(sb, "%s<%.9s>", str, pbuf + 1); +} + +/* + * Show "extended" permissions: regular permissions, but also the + * various DM* extension bits from 9P2000.u. + */ +static void +l9p_describe_ext_perm(const char *str, uint32_t mode, struct sbuf *sb) +{ + static const struct descbits bits[] = { + { L9P_DMDIR, L9P_DMDIR, "DMDIR" }, + { L9P_DMAPPEND, L9P_DMAPPEND, "DMAPPEND" }, + { L9P_DMEXCL, L9P_DMEXCL, "DMEXCL" }, + { L9P_DMMOUNT, L9P_DMMOUNT, "DMMOUNT" }, + { L9P_DMAUTH, L9P_DMAUTH, "DMAUTH" }, + { L9P_DMTMP, L9P_DMTMP, "DMTMP" }, + { L9P_DMSYMLINK, L9P_DMSYMLINK, "DMSYMLINK" }, + { L9P_DMDEVICE, L9P_DMDEVICE, "DMDEVICE" }, + { L9P_DMNAMEDPIPE, L9P_DMNAMEDPIPE, "DMNAMEDPIPE" }, + { L9P_DMSOCKET, L9P_DMSOCKET, "DMSOCKET" }, + { L9P_DMSETUID, L9P_DMSETUID, "DMSETUID" }, + { L9P_DMSETGID, L9P_DMSETGID, "DMSETGID" }, + { 0, 0, NULL } + }; + bool need_sep; + + sbuf_printf(sb, "%s[", str); + need_sep = l9p_describe_bits(NULL, mode & ~(uint32_t)0777, NULL, + bits, sb); + l9p_describe_perm(need_sep ? "," : "", mode & 0777, sb); + sbuf_cat(sb, "]"); +} + +/* + * Show Linux-specific permissions: regular permissions, but also + * the S_IFMT field. + */ +static void +l9p_describe_lperm(const char *str, uint32_t mode, struct sbuf *sb) +{ + static const struct descbits bits[] = { + { S_IFMT, S_IFIFO, "S_IFIFO" }, + { S_IFMT, S_IFCHR, "S_IFCHR" }, + { S_IFMT, S_IFDIR, "S_IFDIR" }, + { S_IFMT, S_IFBLK, "S_IFBLK" }, + { S_IFMT, S_IFREG, "S_IFREG" }, + { S_IFMT, S_IFLNK, "S_IFLNK" }, + { S_IFMT, S_IFSOCK, "S_IFSOCK" }, + { 0, 0, NULL } + }; + bool need_sep; + + sbuf_printf(sb, "%s[", str); + need_sep = l9p_describe_bits(NULL, mode & ~(uint32_t)0777, NULL, + bits, sb); + l9p_describe_perm(need_sep ? "," : "", mode & 0777, sb); + sbuf_cat(sb, "]"); +} + +/* + * Show qid ( tuple). + */ +static void +l9p_describe_qid(const char *str, struct l9p_qid *qid, struct sbuf *sb) +{ + static const struct descbits bits[] = { + /* + * NB: L9P_QTFILE is 0, i.e., is implied by no + * other bits being set. We get this produced + * when we mask against 0xff and compare for + * L9P_QTFILE, but we must do it first so that + * we mask against the original (not-adjusted) + * value. + */ + { 0xff, L9P_QTFILE, "FILE" }, + { L9P_QTDIR, L9P_QTDIR, "DIR" }, + { L9P_QTAPPEND, L9P_QTAPPEND, "APPEND" }, + { L9P_QTEXCL, L9P_QTEXCL, "EXCL" }, + { L9P_QTMOUNT, L9P_QTMOUNT, "MOUNT" }, + { L9P_QTAUTH, L9P_QTAUTH, "AUTH" }, + { L9P_QTTMP, L9P_QTTMP, "TMP" }, + { L9P_QTSYMLINK, L9P_QTSYMLINK, "SYMLINK" }, + { 0, 0, NULL } + }; + + assert(qid != NULL); + + sbuf_cat(sb, str); + (void) l9p_describe_bits("<", qid->type, "[]", bits, sb); + sbuf_printf(sb, ",%" PRIu32 ",0x%016" PRIx64 ">", + qid->version, qid->path); +} + +/* + * Show size. + */ +static void +l9p_describe_size(const char *str, uint64_t size, struct sbuf *sb) +{ + + sbuf_printf(sb, "%s%" PRIu64, str, size); +} + +/* + * Show l9stat (including 9P2000.u extensions if appropriate). + */ +static void +l9p_describe_l9stat(const char *str, struct l9p_stat *st, + enum l9p_version version, struct sbuf *sb) +{ + bool dotu = version >= L9P_2000U; + + assert(st != NULL); + + sbuf_printf(sb, "%stype=0x%04" PRIx32 " dev=0x%08" PRIx32, str, + st->type, st->dev); + l9p_describe_qid(" qid=", &st->qid, sb); + l9p_describe_ext_perm(" mode=", st->mode, sb); + if (st->atime != (uint32_t)-1) + sbuf_printf(sb, " atime=%" PRIu32, st->atime); + if (st->mtime != (uint32_t)-1) + sbuf_printf(sb, " mtime=%" PRIu32, st->mtime); + if (st->length != (uint64_t)-1) + sbuf_printf(sb, " length=%" PRIu64, st->length); + l9p_describe_name(" name=", st->name, sb); + /* + * It's pretty common to have NULL name+gid+muid. They're + * just noise if NULL *and* dot-u; decode only if non-null + * or not-dot-u. + */ + if (st->uid != NULL || !dotu) + l9p_describe_name(" uid=", st->uid, sb); + if (st->gid != NULL || !dotu) + l9p_describe_name(" gid=", st->gid, sb); + if (st->muid != NULL || !dotu) + l9p_describe_name(" muid=", st->muid, sb); + if (dotu) { + if (st->extension != NULL) + l9p_describe_name(" extension=", st->extension, sb); + sbuf_printf(sb, + " n_uid=%" PRIu32 " n_gid=%" PRIu32 " n_muid=%" PRIu32, + st->n_uid, st->n_gid, st->n_muid); + } +} + +static void +l9p_describe_statfs(const char *str, struct l9p_statfs *st, struct sbuf *sb) +{ + + assert(st != NULL); + + sbuf_printf(sb, "%stype=0x%04lx bsize=%lu blocks=%" PRIu64 + " bfree=%" PRIu64 " bavail=%" PRIu64 " files=%" PRIu64 + " ffree=%" PRIu64 " fsid=0x%" PRIx64 " namelen=%" PRIu32 ">", + str, (u_long)st->type, (u_long)st->bsize, st->blocks, + st->bfree, st->bavail, st->files, + st->ffree, st->fsid, st->namelen); +} + +/* + * Decode a timestamp. + * + * Perhaps should use asctime_r. For now, raw values. + */ +static void +l9p_describe_time(struct sbuf *sb, const char *s, uint64_t sec, uint64_t nsec) +{ + + sbuf_cat(sb, s); + if (nsec > 999999999) + sbuf_printf(sb, "%" PRIu64 ".)", + sec, nsec); + else + sbuf_printf(sb, "%" PRIu64 ".%09" PRIu64, sec, nsec); +} + +/* + * Decode readdir data (.L format, variable length names). + */ +static void +l9p_describe_readdir(struct sbuf *sb, struct l9p_f_io *io) +{ + uint32_t count; +#ifdef notyet + int i; + struct l9p_message msg; + struct l9p_dirent de; +#endif + + if ((count = io->count) == 0) { + sbuf_printf(sb, " EOF (count=0)"); + return; + } + + /* + * Can't do this yet because we do not have the original + * req. + */ +#ifdef notyet + sbuf_printf(sb, " count=%" PRIu32 " [", count); + + l9p_init_msg(&msg, req, L9P_UNPACK); + for (i = 0; msg.lm_size < count; i++) { + if (l9p_pudirent(&msg, &de) < 0) { + sbuf_printf(sb, " bad count"); + break; + } + + sbuf_printf(sb, i ? ", " : " "); + l9p_describe_qid(" qid=", &de.qid, sb); + sbuf_printf(sb, " offset=%" PRIu64 " type=%d", + de.offset, de.type); + l9p_describe_name(" name=", de.name); + free(de.name); + } + sbuf_printf(sb, "]=%d dir entries", i); +#else /* notyet */ + sbuf_printf(sb, " count=%" PRIu32, count); +#endif +} + +/* + * Decode Tgetattr request_mask field. + */ +static void +l9p_describe_getattr_mask(uint64_t request_mask, struct sbuf *sb) +{ + static const struct descbits bits[] = { + /* + * Note: ALL and BASIC must occur first and second. + * This is a little dirty: it depends on the way the + * describe_bits code clears the values. If we + * match ALL, we clear all those bits and do not + * match BASIC; if we match BASIC, we clear all + * those bits and do not match individual bits. Thus + * if we have BASIC but not all the additional bits, + * we'll see, e.g., [BASIC,BTIME,GEN]; if we have + * all the additional bits too, we'll see [ALL]. + * + * Since is true below, we'll also spot any + * bits added to the protocol since we made this table. + */ + { L9PL_GETATTR_ALL, L9PL_GETATTR_ALL, "ALL" }, + { L9PL_GETATTR_BASIC, L9PL_GETATTR_BASIC, "BASIC" }, + + /* individual bits in BASIC */ + { L9PL_GETATTR_MODE, L9PL_GETATTR_MODE, "MODE" }, + { L9PL_GETATTR_NLINK, L9PL_GETATTR_NLINK, "NLINK" }, + { L9PL_GETATTR_UID, L9PL_GETATTR_UID, "UID" }, + { L9PL_GETATTR_GID, L9PL_GETATTR_GID, "GID" }, + { L9PL_GETATTR_RDEV, L9PL_GETATTR_RDEV, "RDEV" }, + { L9PL_GETATTR_ATIME, L9PL_GETATTR_ATIME, "ATIME" }, + { L9PL_GETATTR_MTIME, L9PL_GETATTR_MTIME, "MTIME" }, + { L9PL_GETATTR_CTIME, L9PL_GETATTR_CTIME, "CTIME" }, + { L9PL_GETATTR_INO, L9PL_GETATTR_INO, "INO" }, + { L9PL_GETATTR_SIZE, L9PL_GETATTR_SIZE, "SIZE" }, + { L9PL_GETATTR_BLOCKS, L9PL_GETATTR_BLOCKS, "BLOCKS" }, + + /* additional bits in ALL */ + { L9PL_GETATTR_BTIME, L9PL_GETATTR_BTIME, "BTIME" }, + { L9PL_GETATTR_GEN, L9PL_GETATTR_GEN, "GEN" }, + { L9PL_GETATTR_DATA_VERSION, L9PL_GETATTR_DATA_VERSION, + "DATA_VERSION" }, + { 0, 0, NULL } + }; + + (void) l9p_describe_bits(" request_mask=", request_mask, "[]", bits, + sb); +} + +/* + * Decode Tunlinkat flags. + */ +static void +l9p_describe_unlinkat_flags(const char *str, uint32_t flags, struct sbuf *sb) +{ + static const struct descbits bits[] = { + { L9PL_AT_REMOVEDIR, L9PL_AT_REMOVEDIR, "AT_REMOVEDIR" }, + { 0, 0, NULL } + }; + + (void) l9p_describe_bits(str, flags, "[]", bits, sb); +} + +static const char * +lookup_linux_errno(uint32_t linux_errno) +{ + static char unknown[50]; + + /* + * Error numbers in the "base" range (1..ERANGE) are common + * across BSD, MacOS, Linux, and Plan 9. + * + * Error numbers outside that range require translation. + */ + const char *const table[] = { +#define X0(name) [name] = name ## _STR +#define X(name) [name] = name ## _STR + X(LINUX_EAGAIN), + X(LINUX_EDEADLK), + X(LINUX_ENAMETOOLONG), + X(LINUX_ENOLCK), + X(LINUX_ENOSYS), + X(LINUX_ENOTEMPTY), + X(LINUX_ELOOP), + X(LINUX_ENOMSG), + X(LINUX_EIDRM), + X(LINUX_ECHRNG), + X(LINUX_EL2NSYNC), + X(LINUX_EL3HLT), + X(LINUX_EL3RST), + X(LINUX_ELNRNG), + X(LINUX_EUNATCH), + X(LINUX_ENOCSI), + X(LINUX_EL2HLT), + X(LINUX_EBADE), + X(LINUX_EBADR), + X(LINUX_EXFULL), + X(LINUX_ENOANO), + X(LINUX_EBADRQC), + X(LINUX_EBADSLT), + X(LINUX_EBFONT), + X(LINUX_ENOSTR), + X(LINUX_ENODATA), + X(LINUX_ETIME), + X(LINUX_ENOSR), + X(LINUX_ENONET), + X(LINUX_ENOPKG), + X(LINUX_EREMOTE), + X(LINUX_ENOLINK), + X(LINUX_EADV), + X(LINUX_ESRMNT), + X(LINUX_ECOMM), + X(LINUX_EPROTO), + X(LINUX_EMULTIHOP), + X(LINUX_EDOTDOT), + X(LINUX_EBADMSG), + X(LINUX_EOVERFLOW), + X(LINUX_ENOTUNIQ), + X(LINUX_EBADFD), + X(LINUX_EREMCHG), + X(LINUX_ELIBACC), + X(LINUX_ELIBBAD), + X(LINUX_ELIBSCN), + X(LINUX_ELIBMAX), + X(LINUX_ELIBEXEC), + X(LINUX_EILSEQ), + X(LINUX_ERESTART), + X(LINUX_ESTRPIPE), + X(LINUX_EUSERS), + X(LINUX_ENOTSOCK), + X(LINUX_EDESTADDRREQ), + X(LINUX_EMSGSIZE), + X(LINUX_EPROTOTYPE), + X(LINUX_ENOPROTOOPT), + X(LINUX_EPROTONOSUPPORT), + X(LINUX_ESOCKTNOSUPPORT), + X(LINUX_EOPNOTSUPP), + X(LINUX_EPFNOSUPPORT), + X(LINUX_EAFNOSUPPORT), + X(LINUX_EADDRINUSE), + X(LINUX_EADDRNOTAVAIL), + X(LINUX_ENETDOWN), + X(LINUX_ENETUNREACH), + X(LINUX_ENETRESET), + X(LINUX_ECONNABORTED), + X(LINUX_ECONNRESET), + X(LINUX_ENOBUFS), + X(LINUX_EISCONN), + X(LINUX_ENOTCONN), + X(LINUX_ESHUTDOWN), + X(LINUX_ETOOMANYREFS), + X(LINUX_ETIMEDOUT), + X(LINUX_ECONNREFUSED), + X(LINUX_EHOSTDOWN), + X(LINUX_EHOSTUNREACH), + X(LINUX_EALREADY), + X(LINUX_EINPROGRESS), + X(LINUX_ESTALE), + X(LINUX_EUCLEAN), + X(LINUX_ENOTNAM), + X(LINUX_ENAVAIL), + X(LINUX_EISNAM), + X(LINUX_EREMOTEIO), + X(LINUX_EDQUOT), + X(LINUX_ENOMEDIUM), + X(LINUX_EMEDIUMTYPE), + X(LINUX_ECANCELED), + X(LINUX_ENOKEY), + X(LINUX_EKEYEXPIRED), + X(LINUX_EKEYREVOKED), + X(LINUX_EKEYREJECTED), + X(LINUX_EOWNERDEAD), + X(LINUX_ENOTRECOVERABLE), + X(LINUX_ERFKILL), + X(LINUX_EHWPOISON), +#undef X0 +#undef X + }; + if ((size_t)linux_errno < N(table) && table[linux_errno] != NULL) + return (table[linux_errno]); + if (linux_errno <= ERANGE) + return (strerror((int)linux_errno)); + (void) snprintf(unknown, sizeof(unknown), + "Unknown error %d", linux_errno); + return (unknown); +} + +void +l9p_describe_fcall(union l9p_fcall *fcall, enum l9p_version version, + struct sbuf *sb) +{ + uint64_t mask; + uint8_t type; + int i; + + assert(fcall != NULL); + assert(sb != NULL); + assert(version <= L9P_2000L && version >= L9P_INVALID_VERSION); + + type = fcall->hdr.type; + + if (type < L9P__FIRST || type >= L9P__LAST_PLUS_1 || + ftype_names[type - L9P__FIRST] == NULL) { + const char *rr; + + /* + * Can't say for sure that this distinction -- + * an even number is a request, an odd one is + * a response -- will be maintained forever, + * but it's good enough for now. + */ + rr = (type & 1) != 0 ? "response" : "request"; + sbuf_printf(sb, " tag=%d", rr, type, + fcall->hdr.tag); + } else { + sbuf_printf(sb, "%s tag=%d", ftype_names[type - L9P__FIRST], + fcall->hdr.tag); + } + + switch (type) { + case L9P_TVERSION: + case L9P_RVERSION: + sbuf_printf(sb, " version=\"%s\" msize=%d", fcall->version.version, + fcall->version.msize); + return; + + case L9P_TAUTH: + l9p_describe_fid(" afid=", fcall->hdr.fid, sb); + sbuf_printf(sb, " uname=\"%s\" aname=\"%s\"", + fcall->tauth.uname, fcall->tauth.aname); + return; + + case L9P_TATTACH: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_fid(" afid=", fcall->tattach.afid, sb); + sbuf_printf(sb, " uname=\"%s\" aname=\"%s\"", + fcall->tattach.uname, fcall->tattach.aname); + if (version >= L9P_2000U) + sbuf_printf(sb, " n_uname=%d", fcall->tattach.n_uname); + return; + + case L9P_RATTACH: + l9p_describe_qid(" ", &fcall->rattach.qid, sb); + return; + + case L9P_RERROR: + sbuf_printf(sb, " ename=\"%s\" errnum=%d", fcall->error.ename, + fcall->error.errnum); + return; + + case L9P_RLERROR: + sbuf_printf(sb, " errnum=%d (%s)", fcall->error.errnum, + lookup_linux_errno(fcall->error.errnum)); + return; + + case L9P_TFLUSH: + sbuf_printf(sb, " oldtag=%d", fcall->tflush.oldtag); + return; + + case L9P_RFLUSH: + return; + + case L9P_TWALK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_fid(" newfid=", fcall->twalk.newfid, sb); + if (fcall->twalk.nwname) { + sbuf_cat(sb, " wname=\""); + for (i = 0; i < fcall->twalk.nwname; i++) + sbuf_printf(sb, "%s%s", i == 0 ? "" : "/", + fcall->twalk.wname[i]); + sbuf_cat(sb, "\""); + } + return; + + case L9P_RWALK: + sbuf_printf(sb, " wqid=["); + for (i = 0; i < fcall->rwalk.nwqid; i++) + l9p_describe_qid(i == 0 ? "" : ",", + &fcall->rwalk.wqid[i], sb); + sbuf_cat(sb, "]"); + return; + + case L9P_TOPEN: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_mode(" mode=", fcall->tcreate.mode, sb); + return; + + case L9P_ROPEN: + l9p_describe_qid(" qid=", &fcall->ropen.qid, sb); + sbuf_printf(sb, " iounit=%d", fcall->ropen.iounit); + return; + + case L9P_TCREATE: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tcreate.name, sb); + l9p_describe_ext_perm(" perm=", fcall->tcreate.perm, sb); + l9p_describe_mode(" mode=", fcall->tcreate.mode, sb); + if (version >= L9P_2000U && fcall->tcreate.extension != NULL) + l9p_describe_name(" extension=", + fcall->tcreate.extension, sb); + return; + + case L9P_RCREATE: + l9p_describe_qid(" qid=", &fcall->rcreate.qid, sb); + sbuf_printf(sb, " iounit=%d", fcall->rcreate.iounit); + return; + + case L9P_TREAD: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + sbuf_printf(sb, " offset=%" PRIu64 " count=%" PRIu32, + fcall->io.offset, fcall->io.count); + return; + + case L9P_RREAD: + case L9P_RWRITE: + sbuf_printf(sb, " count=%" PRIu32, fcall->io.count); + return; + + case L9P_TWRITE: + case L9P_TREADDIR: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + sbuf_printf(sb, " offset=%" PRIu64 " count=%" PRIu32, + fcall->io.offset, fcall->io.count); + return; + + case L9P_TCLUNK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + return; + + case L9P_RCLUNK: + return; + + case L9P_TREMOVE: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + return; + + case L9P_RREMOVE: + return; + + case L9P_TSTAT: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + return; + + case L9P_RSTAT: + l9p_describe_l9stat(" ", &fcall->rstat.stat, version, sb); + return; + + case L9P_TWSTAT: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_l9stat(" ", &fcall->twstat.stat, version, sb); + return; + + case L9P_RWSTAT: + return; + + case L9P_TSTATFS: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + return; + + case L9P_RSTATFS: + l9p_describe_statfs(" ", &fcall->rstatfs.statfs, sb); + return; + + case L9P_TLOPEN: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_lflags(" flags=", fcall->tlcreate.flags, sb); + return; + + case L9P_RLOPEN: + l9p_describe_qid(" qid=", &fcall->rlopen.qid, sb); + sbuf_printf(sb, " iounit=%d", fcall->rlopen.iounit); + return; + + case L9P_TLCREATE: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tlcreate.name, sb); + /* confusing: "flags" is open-mode, "mode" is permissions */ + l9p_describe_lflags(" flags=", fcall->tlcreate.flags, sb); + /* TLCREATE mode/permissions have S_IFREG (0x8000) set */ + l9p_describe_lperm(" mode=", fcall->tlcreate.mode, sb); + l9p_describe_ugid(" gid=", fcall->tlcreate.gid, sb); + return; + + case L9P_RLCREATE: + l9p_describe_qid(" qid=", &fcall->rlcreate.qid, sb); + sbuf_printf(sb, " iounit=%d", fcall->rlcreate.iounit); + return; + + case L9P_TSYMLINK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tsymlink.name, sb); + l9p_describe_name(" symtgt=", fcall->tsymlink.symtgt, sb); + l9p_describe_ugid(" gid=", fcall->tsymlink.gid, sb); + return; + + case L9P_RSYMLINK: + l9p_describe_qid(" qid=", &fcall->ropen.qid, sb); + return; + + case L9P_TMKNOD: + l9p_describe_fid(" dfid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tmknod.name, sb); + /* + * TMKNOD mode/permissions have S_IFBLK/S_IFCHR/S_IFIFO + * bits. The major and minor values are only meaningful + * for S_IFBLK and S_IFCHR, but just decode always here. + */ + l9p_describe_lperm(" mode=", fcall->tmknod.mode, sb); + sbuf_printf(sb, " major=%u minor=%u", + fcall->tmknod.major, fcall->tmknod.minor); + l9p_describe_ugid(" gid=", fcall->tmknod.gid, sb); + return; + + case L9P_RMKNOD: + l9p_describe_qid(" qid=", &fcall->rmknod.qid, sb); + return; + + case L9P_TRENAME: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_fid(" dfid=", fcall->trename.dfid, sb); + l9p_describe_name(" name=", fcall->trename.name, sb); + return; + + case L9P_RRENAME: + return; + + case L9P_TREADLINK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + return; + + case L9P_RREADLINK: + l9p_describe_name(" target=", fcall->rreadlink.target, sb); + return; + + case L9P_TGETATTR: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_getattr_mask(fcall->tgetattr.request_mask, sb); + return; + + case L9P_RGETATTR: + /* Don't need to decode bits: they're implied by the output */ + mask = fcall->rgetattr.valid; + sbuf_printf(sb, " valid=0x%016" PRIx64, mask); + l9p_describe_qid(" qid=", &fcall->rgetattr.qid, sb); + if (mask & L9PL_GETATTR_MODE) + l9p_describe_lperm(" mode=", fcall->rgetattr.mode, sb); + if (mask & L9PL_GETATTR_UID) + l9p_describe_ugid(" uid=", fcall->rgetattr.uid, sb); + if (mask & L9PL_GETATTR_GID) + l9p_describe_ugid(" gid=", fcall->rgetattr.gid, sb); + if (mask & L9PL_GETATTR_NLINK) + sbuf_printf(sb, " nlink=%" PRIu64, + fcall->rgetattr.nlink); + if (mask & L9PL_GETATTR_RDEV) + sbuf_printf(sb, " rdev=0x%" PRIx64, + fcall->rgetattr.rdev); + if (mask & L9PL_GETATTR_SIZE) + l9p_describe_size(" size=", fcall->rgetattr.size, sb); + if (mask & L9PL_GETATTR_BLOCKS) + sbuf_printf(sb, " blksize=%" PRIu64 " blocks=%" PRIu64, + fcall->rgetattr.blksize, fcall->rgetattr.blocks); + if (mask & L9PL_GETATTR_ATIME) + l9p_describe_time(sb, " atime=", + fcall->rgetattr.atime_sec, + fcall->rgetattr.atime_nsec); + if (mask & L9PL_GETATTR_MTIME) + l9p_describe_time(sb, " mtime=", + fcall->rgetattr.mtime_sec, + fcall->rgetattr.mtime_nsec); + if (mask & L9PL_GETATTR_CTIME) + l9p_describe_time(sb, " ctime=", + fcall->rgetattr.ctime_sec, + fcall->rgetattr.ctime_nsec); + if (mask & L9PL_GETATTR_BTIME) + l9p_describe_time(sb, " btime=", + fcall->rgetattr.btime_sec, + fcall->rgetattr.btime_nsec); + if (mask & L9PL_GETATTR_GEN) + sbuf_printf(sb, " gen=0x%" PRIx64, fcall->rgetattr.gen); + if (mask & L9PL_GETATTR_DATA_VERSION) + sbuf_printf(sb, " data_version=0x%" PRIx64, + fcall->rgetattr.data_version); + return; + + case L9P_TSETATTR: + /* As with RGETATTR, we'll imply decode via output. */ + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + mask = fcall->tsetattr.valid; + /* NB: tsetattr valid mask is only 32 bits, hence %08x */ + sbuf_printf(sb, " valid=0x%08" PRIx64, mask); + if (mask & L9PL_SETATTR_MODE) + l9p_describe_lperm(" mode=", fcall->tsetattr.mode, sb); + if (mask & L9PL_SETATTR_UID) + l9p_describe_ugid(" uid=", fcall->tsetattr.uid, sb); + if (mask & L9PL_SETATTR_GID) + l9p_describe_ugid(" uid=", fcall->tsetattr.gid, sb); + if (mask & L9PL_SETATTR_SIZE) + l9p_describe_size(" size=", fcall->tsetattr.size, sb); + if (mask & L9PL_SETATTR_ATIME) { + if (mask & L9PL_SETATTR_ATIME_SET) + l9p_describe_time(sb, " atime=", + fcall->tsetattr.atime_sec, + fcall->tsetattr.atime_nsec); + else + sbuf_cat(sb, " atime=now"); + } + if (mask & L9PL_SETATTR_MTIME) { + if (mask & L9PL_SETATTR_MTIME_SET) + l9p_describe_time(sb, " mtime=", + fcall->tsetattr.mtime_sec, + fcall->tsetattr.mtime_nsec); + else + sbuf_cat(sb, " mtime=now"); + } + if (mask & L9PL_SETATTR_CTIME) + sbuf_cat(sb, " ctime=now"); + return; + + case L9P_RSETATTR: + return; + + case L9P_TXATTRWALK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_fid(" newfid=", fcall->txattrwalk.newfid, sb); + l9p_describe_name(" name=", fcall->txattrwalk.name, sb); + return; + + case L9P_RXATTRWALK: + l9p_describe_size(" size=", fcall->rxattrwalk.size, sb); + return; + + case L9P_TXATTRCREATE: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->txattrcreate.name, sb); + l9p_describe_size(" size=", fcall->txattrcreate.attr_size, sb); + sbuf_printf(sb, " flags=%" PRIu32, fcall->txattrcreate.flags); + return; + + case L9P_RXATTRCREATE: + return; + + case L9P_RREADDIR: + l9p_describe_readdir(sb, &fcall->io); + return; + + case L9P_TFSYNC: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + return; + + case L9P_RFSYNC: + return; + + case L9P_TLOCK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + /* decode better later */ + sbuf_printf(sb, " type=%d flags=0x%" PRIx32 + " start=%" PRIu64 " length=%" PRIu64 + " proc_id=0x%" PRIx32 " client_id=\"%s\"", + fcall->tlock.type, fcall->tlock.flags, + fcall->tlock.start, fcall->tlock.length, + fcall->tlock.proc_id, fcall->tlock.client_id); + return; + + case L9P_RLOCK: + sbuf_printf(sb, " status=%d", fcall->rlock.status); + return; + + case L9P_TGETLOCK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + /* FALLTHROUGH */ + + case L9P_RGETLOCK: + /* decode better later */ + sbuf_printf(sb, " type=%d " + " start=%" PRIu64 " length=%" PRIu64 + " proc_id=0x%" PRIx32 " client_id=\"%s\"", + fcall->getlock.type, + fcall->getlock.start, fcall->getlock.length, + fcall->getlock.proc_id, fcall->getlock.client_id); + return; + + case L9P_TLINK: + l9p_describe_fid(" dfid=", fcall->tlink.dfid, sb); + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tlink.name, sb); + return; + + case L9P_RLINK: + return; + + case L9P_TMKDIR: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tmkdir.name, sb); + /* TMKDIR mode/permissions have S_IFDIR set */ + l9p_describe_lperm(" mode=", fcall->tmkdir.mode, sb); + l9p_describe_ugid(" gid=", fcall->tmkdir.gid, sb); + return; + + case L9P_RMKDIR: + l9p_describe_qid(" qid=", &fcall->rmkdir.qid, sb); + return; + + case L9P_TRENAMEAT: + l9p_describe_fid(" olddirfid=", fcall->hdr.fid, sb); + l9p_describe_name(" oldname=", fcall->trenameat.oldname, + sb); + l9p_describe_fid(" newdirfid=", fcall->trenameat.newdirfid, sb); + l9p_describe_name(" newname=", fcall->trenameat.newname, + sb); + return; + + case L9P_RRENAMEAT: + return; + + case L9P_TUNLINKAT: + l9p_describe_fid(" dirfd=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tunlinkat.name, sb); + l9p_describe_unlinkat_flags(" flags=", + fcall->tunlinkat.flags, sb); + return; + + case L9P_RUNLINKAT: + return; + + default: + sbuf_printf(sb, " ", __func__); + } +} Index: share/mk/bsd.libnames.mk =================================================================== --- share/mk/bsd.libnames.mk +++ share/mk/bsd.libnames.mk @@ -17,6 +17,7 @@ LIBCRT0?= ${LIBDESTDIR}${LIBDIR_BASE}/crt0.o LIB80211?= ${LIBDESTDIR}${LIBDIR_BASE}/lib80211.a +LIB9P?= ${LIBDESTDIR}${LIBDIR_BASE}/lib9p.a LIBALIAS?= ${LIBDESTDIR}${LIBDIR_BASE}/libalias.a LIBARCHIVE?= ${LIBDESTDIR}${LIBDIR_BASE}/libarchive.a LIBASN1?= ${LIBDESTDIR}${LIBDIR_BASE}/libasn1.a Index: share/mk/src.libnames.mk =================================================================== --- share/mk/src.libnames.mk +++ share/mk/src.libnames.mk @@ -69,6 +69,7 @@ ${_INTERNALLIBS} \ ${LOCAL_LIBRARIES} \ 80211 \ + 9p \ alias \ archive \ asn1 \ @@ -239,6 +240,7 @@ # Each library's LIBADD needs to be duplicated here for static linkage of # 2nd+ order consumers. Auto-generating this would be better. _DP_80211= sbuf bsdxml +_DP_9p= sbuf _DP_archive= z bz2 lzma bsdxml zstd _DP_zstd= pthread .if ${MK_BLACKLIST} != "no" Index: usr.sbin/bhyve/Makefile =================================================================== --- usr.sbin/bhyve/Makefile +++ usr.sbin/bhyve/Makefile @@ -3,6 +3,7 @@ # .include +CFLAGS+=-I${SRCTOP}/lib/lib9p CFLAGS+=-I${SRCTOP}/sys .PATH: ${SRCTOP}/sys/cam/ctl @@ -46,6 +47,7 @@ pci_lpc.c \ pci_nvme.c \ pci_passthru.c \ + pci_virtio_9p.c \ pci_virtio_block.c \ pci_virtio_console.c \ pci_virtio_net.c \ @@ -74,7 +76,7 @@ .PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm SRCS+= vmm_instruction_emul.c -LIBADD= vmmapi md pthread z util sbuf cam +LIBADD= vmmapi md pthread z util sbuf cam 9p casper cap_pwd cap_grp .if ${MK_INET_SUPPORT} != "no" CFLAGS+=-DINET Index: usr.sbin/bhyve/bhyve.8 =================================================================== --- usr.sbin/bhyve/bhyve.8 +++ usr.sbin/bhyve/bhyve.8 @@ -223,6 +223,8 @@ Virtio block storage interface. .It Li virtio-scsi Virtio SCSI interface. +.It Li virtio-9p +Virtio 9p (VirtFS) interface. .It Li virtio-rnd Virtio RNG interface. .It Li virtio-console @@ -312,6 +314,19 @@ The default value is 0. .El .Pp +9P devices: +.Bl -tag -width 10n +.It Pa sharename=/path/to/share[,9p-device-options] +.El +.Pp +The +.Ar 9p-device-options +are: +.Bl -tag -width 10n +.It Li ro +Expose the share in read-only mode. +.El +.Pp TTY devices: .Bl -tag -width 10n .It Li stdio Index: usr.sbin/bhyve/pci_virtio_9p.c =================================================================== --- /dev/null +++ usr.sbin/bhyve/pci_virtio_9p.c @@ -0,0 +1,344 @@ +/*- + * Copyright (c) 2015 iXsystems Inc. + * Copyright (c) 2017-2018 Jakub Klama + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * VirtIO filesystem passthrough using 9p protocol. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "bhyverun.h" +#include "pci_emul.h" +#include "virtio.h" + +#define VT9P_MAX_IOV 128 +#define VT9P_RINGSZ 256 +#define VT9P_MAXTAGSZ 256 +#define VT9P_CONFIGSPACESZ (VT9P_MAXTAGSZ + sizeof(uint16_t)) + +static int pci_vt9p_debug; +#define DPRINTF(params) if (pci_vt9p_debug) printf params +#define WPRINTF(params) printf params + +/* + * Per-device softc + */ +struct pci_vt9p_softc { + struct virtio_softc vsc_vs; + struct vqueue_info vsc_vq; + pthread_mutex_t vsc_mtx; + uint64_t vsc_cfg; + uint64_t vsc_features; + char * vsc_rootpath; + struct pci_vt9p_config * vsc_config; + struct l9p_backend * vsc_fs_backend; + struct l9p_server * vsc_server; + struct l9p_connection * vsc_conn; +}; + +struct pci_vt9p_request { + struct pci_vt9p_softc * vsr_sc; + struct iovec * vsr_iov; + size_t vsr_niov; + size_t vsr_respidx; + size_t vsr_iolen; + uint16_t vsr_idx; +}; + +struct pci_vt9p_config { + uint16_t tag_len; + char tag[0]; +} __attribute__((packed)); + +static int pci_vt9p_send(struct l9p_request *, const struct iovec *, + const size_t, const size_t, void *); +static void pci_vt9p_drop(struct l9p_request *, const struct iovec *, size_t, + void *); +static void pci_vt9p_reset(void *); +static void pci_vt9p_notify(void *, struct vqueue_info *); +static int pci_vt9p_cfgread(void *, int, int, uint32_t *); +static void pci_vt9p_neg_features(void *, uint64_t); + +static struct virtio_consts vt9p_vi_consts = { + "vt9p", /* our name */ + 1, /* we support 1 virtqueue */ + VT9P_CONFIGSPACESZ, /* config reg size */ + pci_vt9p_reset, /* reset */ + pci_vt9p_notify, /* device-wide qnotify */ + pci_vt9p_cfgread, /* read virtio config */ + NULL, /* write virtio config */ + pci_vt9p_neg_features, /* apply negotiated features */ + (1 << 0), /* our capabilities */ +}; + + +static void +pci_vt9p_reset(void *vsc) +{ + struct pci_vt9p_softc *sc; + + sc = vsc; + + DPRINTF(("vt9p: device reset requested !\n")); + vi_reset_dev(&sc->vsc_vs); +} + +static void +pci_vt9p_neg_features(void *vsc, uint64_t negotiated_features) +{ + struct pci_vt9p_softc *sc = vsc; + + sc->vsc_features = negotiated_features; +} + +static int +pci_vt9p_cfgread(void *vsc, int offset, int size, uint32_t *retval) +{ + struct pci_vt9p_softc *sc = vsc; + void *ptr; + + ptr = (uint8_t *)sc->vsc_config + offset; + memcpy(retval, ptr, size); + return (0); +} + +static int +pci_vt9p_get_buffer(struct l9p_request *req, struct iovec *iov, size_t *niov, + void *arg) +{ + struct pci_vt9p_request *preq = req->lr_aux; + size_t n = preq->vsr_niov - preq->vsr_respidx; + + memcpy(iov, preq->vsr_iov + preq->vsr_respidx, + n * sizeof(struct iovec)); + *niov = n; + return (0); +} + +static int +pci_vt9p_send(struct l9p_request *req, const struct iovec *iov, + const size_t niov, const size_t iolen, void *arg) +{ + struct pci_vt9p_request *preq = req->lr_aux; + struct pci_vt9p_softc *sc = preq->vsr_sc; + + preq->vsr_iolen = iolen; + + pthread_mutex_lock(&sc->vsc_mtx); + vq_relchain(&sc->vsc_vq, preq->vsr_idx, preq->vsr_iolen); + vq_endchains(&sc->vsc_vq, 1); + pthread_mutex_unlock(&sc->vsc_mtx); + free(preq); + return (0); +} + +static void +pci_vt9p_drop(struct l9p_request *req, const struct iovec *iov, size_t niov, + void *arg) +{ + struct pci_vt9p_request *preq = req->lr_aux; + struct pci_vt9p_softc *sc = preq->vsr_sc; + + pthread_mutex_lock(&sc->vsc_mtx); + vq_relchain(&sc->vsc_vq, preq->vsr_idx, 0); + vq_endchains(&sc->vsc_vq, 1); + pthread_mutex_unlock(&sc->vsc_mtx); + free(preq); +} + +static void +pci_vt9p_notify(void *vsc, struct vqueue_info *vq) +{ + struct iovec iov[VT9P_MAX_IOV]; + struct pci_vt9p_softc *sc; + struct pci_vt9p_request *preq; + uint16_t idx, n, i; + uint16_t flags[VT9P_MAX_IOV]; + + sc = vsc; + + while (vq_has_descs(vq)) { + n = vq_getchain(vq, &idx, iov, VT9P_MAX_IOV, flags); + preq = calloc(1, sizeof(struct pci_vt9p_request)); + preq->vsr_sc = sc; + preq->vsr_idx = idx; + preq->vsr_iov = iov; + preq->vsr_niov = n; + preq->vsr_respidx = 0; + + /* Count readable descriptors */ + for (i = 0; i < n; i++) { + if (flags[i] & VRING_DESC_F_WRITE) + break; + + preq->vsr_respidx++; + } + + for (int i = 0; i < n; i++) { + DPRINTF(("vt9p: vt9p_notify(): desc%d base=%p, " + "len=%zu, flags=0x%04x\r\n", i, iov[i].iov_base, + iov[i].iov_len, flags[i])); + } + + l9p_connection_recv(sc->vsc_conn, iov, preq->vsr_respidx, preq); + } +} + + +static int +pci_vt9p_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) +{ + struct pci_vt9p_softc *sc; + char *opt; + char *sharename = NULL; + char *rootpath = NULL; + int rootfd; + bool ro = false; + cap_rights_t rootcap; + + if (opts == NULL) { + printf("virtio-9p: share name and path required\n"); + return (1); + } + + while ((opt = strsep(&opts, ",")) != NULL) { + if (strchr(opt, '=') != NULL) { + if (sharename != NULL) { + printf("virtio-9p: more than one share name given\n"); + return (1); + } + + sharename = strsep(&opt, "="); + rootpath = opt; + continue; + } + + if (strcmp(opt, "ro") == 0) { + DPRINTF(("read-only mount requested\r\n")); + ro = true; + continue; + } + + printf("virtio-9p: invalid option '%s'\n", opt); + return (1); + } + + if (strlen(sharename) > VT9P_MAXTAGSZ) { + printf("virtio-9p: share name too long\n"); + return (1); + } + + rootfd = open(rootpath, O_DIRECTORY); + if (rootfd < 0) + return (-1); + + sc = calloc(1, sizeof(struct pci_vt9p_softc)); + sc->vsc_config = calloc(1, sizeof(struct pci_vt9p_config) + + VT9P_MAXTAGSZ); + + pthread_mutex_init(&sc->vsc_mtx, NULL); + + cap_rights_init(&rootcap, + CAP_LOOKUP, CAP_ACL_CHECK, CAP_ACL_DELETE, CAP_ACL_GET, + CAP_ACL_SET, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSTAT, + CAP_CREATE, CAP_FCHMODAT, CAP_FCHOWNAT, CAP_FTRUNCATE, + CAP_LINKAT_SOURCE, CAP_LINKAT_TARGET, CAP_MKDIRAT, CAP_MKNODAT, + CAP_PREAD, CAP_PWRITE, CAP_RENAMEAT_SOURCE, CAP_RENAMEAT_TARGET, + CAP_SEEK, CAP_SYMLINKAT, CAP_UNLINKAT, CAP_EXTATTR_DELETE, + CAP_EXTATTR_GET, CAP_EXTATTR_LIST, CAP_EXTATTR_SET, + CAP_FUTIMES, CAP_FSTATFS, CAP_FSYNC, CAP_FPATHCONF); + + if (cap_rights_limit(rootfd, &rootcap) != 0) + return (1); + + sc->vsc_config->tag_len = (uint16_t)strlen(sharename); + memcpy(sc->vsc_config->tag, sharename, sc->vsc_config->tag_len); + + if (l9p_backend_fs_init(&sc->vsc_fs_backend, rootfd, ro) != 0) { + errno = ENXIO; + return (1); + } + + if (l9p_server_init(&sc->vsc_server, sc->vsc_fs_backend) != 0) { + errno = ENXIO; + return (1); + } + + if (l9p_connection_init(sc->vsc_server, &sc->vsc_conn) != 0) { + errno = EIO; + return (1); + } + + sc->vsc_conn->lc_msize = L9P_MAX_IOV * PAGE_SIZE; + sc->vsc_conn->lc_lt.lt_get_response_buffer = pci_vt9p_get_buffer; + sc->vsc_conn->lc_lt.lt_send_response = pci_vt9p_send; + sc->vsc_conn->lc_lt.lt_drop_response = pci_vt9p_drop; + + vi_softc_linkup(&sc->vsc_vs, &vt9p_vi_consts, sc, pi, &sc->vsc_vq); + sc->vsc_vs.vs_mtx = &sc->vsc_mtx; + sc->vsc_vq.vq_qsize = VT9P_RINGSZ; + + /* initialize config space */ + pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_9P); + pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); + pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); + pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_9P); + pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); + + if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) + return (1); + vi_set_io_bar(&sc->vsc_vs, 0); + + return (0); +} + +struct pci_devemu pci_de_v9p = { + .pe_emu = "virtio-9p", + .pe_init = pci_vt9p_init, + .pe_barwrite = vi_pci_write, + .pe_barread = vi_pci_read +}; +PCI_EMUL_SET(pci_de_v9p); Index: usr.sbin/bhyve/virtio.h =================================================================== --- usr.sbin/bhyve/virtio.h +++ usr.sbin/bhyve/virtio.h @@ -216,6 +216,7 @@ #define VIRTIO_DEV_CONSOLE 0x1003 #define VIRTIO_DEV_RANDOM 0x1005 #define VIRTIO_DEV_SCSI 0x1008 +#define VIRTIO_DEV_9P 0x1009 /* * PCI config space constants.