diff --git a/share/man/man5/Makefile b/share/man/man5/Makefile --- a/share/man/man5/Makefile +++ b/share/man/man5/Makefile @@ -47,6 +47,7 @@ nsswitch.conf.5 \ nullfs.5 \ os-release.5 \ + p9fs.5 \ passwd.5 \ pbm.5 \ periodic.conf.5 \ diff --git a/share/man/man5/p9fs.5 b/share/man/man5/p9fs.5 new file mode 100644 --- /dev/null +++ b/share/man/man5/p9fs.5 @@ -0,0 +1,118 @@ +.\" +.\" Copyright (c) 2022-present Doug Rabson +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS DOCUMENTATION IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd December 7, 2022 +.Dt P9FS 5 +.Os +.Sh NAME +.Nm p9fs +.Nd "9P file system" +.Sh SYNOPSIS +To use this filesystem, +either add the following to the kernel config: +.Bd -ragged -offset indent +.Cd "options P9FS" +.Cd "device virtio_p9fs" +.Ed +.Pp +Alternatively, load the driver as a kernel module, +either at boot time by adding the following to +.Xr loader.conf 5 : +.Bd -literal -offset indent +virtio_p9fs_load="YES" +.Ed +.Pp +or on system startup using the command: +.Pp +.Dl "# sysrc kld_list+=virtio_p9fs" +.Sh DESCRIPTION +The +.Nm +filesystem uses the 9P protocol to mount a host file system directory +into a +.Xr bhyve 8 +guest. +Multiple host directories can be accessed using the +.Xr bhyve 8 +virtio-9p virtual PCI device. +Each device is configured with a share name and a host directory path. +The share name can be used with +.Xr mount 8 +to mount the host directory in the guest: +.Pp +.Dl "# mount -t p9fs mysharename /mnt" +.Pp +Host directories can be mounted on system startup using +.Xr fstab 5 +like this: +.Pp +.Bd -literal -offset indent +mysharename /mnt p9fs rw 0 0 +.Ed +.Pp +Using +.Nm +as a root file system is supported by adding the following to +.Xr loader.conf 5 : +.Bd -literal -offset indent +vfs.root.mountfrom="p9fs:mysharename" +.Ed +.Sh LIMITATIONS +The 9P protocol relies on stateful file opens +which map protocol-level FIDs to host file descriptors. +The FreeBSD vnode interface doesn't support this and +.Nm +uses heuristics to guess the right FID to use for file operations. +.Pp +This can be confused by privilege lowering and +does not guarantee that the FID created for a +given file open is always used for file operations, +even if the calling process is using the file descriptor from +the original open call. +.Pp +In particular, accessing unlinked files using open file descriptor +may not work correctly. +If +.Nm +is the root filesystem, +it is recommented to use with +.Xr tmpfs 5 +to ensure that temporary files created in +.Pa /tmp +or +.Pa /var/tmp +have the expected semantics. +.Sh SEE ALSO +.Xr fstab 5 , +.Sh HISTORY +The 9P protocol first appeared in the Plan 9 operating system. +More recently, the protocol has been widely used with virtual machines +to allow the use of host file resources inside a guest VM. +.Sh AUTHORS +This is derived from software released by Juniper Networks, Inc. +with many improvements and fixes from +.An Steve Wills . +.Pp +This manual page was written by +.An Doug Rabson Aq Mt dfr@FreeBSD.org . diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -3406,6 +3406,7 @@ dev/virtio/scsi/virtio_scsi.c optional virtio_scsi dev/virtio/random/virtio_random.c optional virtio_random dev/virtio/console/virtio_console.c optional virtio_console +dev/virtio/p9fs/virtio_p9fs.c optional virtio_p9fs dev/vkbd/vkbd.c optional vkbd dev/vmgenc/vmgenc_acpi.c optional acpi dev/vmware/vmxnet3/if_vmx.c optional vmx @@ -3552,6 +3553,12 @@ fs/nullfs/null_subr.c optional nullfs fs/nullfs/null_vfsops.c optional nullfs fs/nullfs/null_vnops.c optional nullfs +fs/p9fs/p9_client.c optional p9fs +fs/p9fs/p9_protocol.c optional p9fs +fs/p9fs/p9_transport.c optional p9fs +fs/p9fs/p9fs_subr.c optional p9fs +fs/p9fs/p9fs_vfsops.c optional p9fs +fs/p9fs/p9fs_vnops.c optional p9fs fs/procfs/procfs.c optional procfs fs/procfs/procfs_dbregs.c optional procfs fs/procfs/procfs_fpregs.c optional procfs diff --git a/sys/conf/options b/sys/conf/options --- a/sys/conf/options +++ b/sys/conf/options @@ -281,6 +281,7 @@ UDF opt_dontuse.h UNIONFS opt_dontuse.h ZFS opt_dontuse.h +P9FS opt_dontuse.h # Pseudofs debugging PSEUDOFS_TRACE opt_pseudofs.h diff --git a/sys/dev/virtio/p9fs/virtio_p9fs.h b/sys/dev/virtio/p9fs/virtio_p9fs.h new file mode 100644 --- /dev/null +++ b/sys/dev/virtio/p9fs/virtio_p9fs.h @@ -0,0 +1,39 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __VIRTIO_9P_CONFIG__ +#define __VIRTIO_9P_CONFIG__ + +/* Mount point feature specified in config variable */ +#define VIRTIO_9PNET_F_MOUNT_TAG 1 + +struct virtio_9pnet_config { + /* Mount tag length */ + uint16_t mount_tag_len; + /* non NULL terminated tag name */ + uint8_t mount_tag[0]; +}; +#endif /* __VIRTIO_9P_CONFIG__ */ diff --git a/sys/dev/virtio/p9fs/virtio_p9fs.c b/sys/dev/virtio/p9fs/virtio_p9fs.c new file mode 100644 --- /dev/null +++ b/sys/dev/virtio/p9fs/virtio_p9fs.c @@ -0,0 +1,494 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +/* + * The Virtio 9P transport driver. This file contains all functions related to + * the virtqueue infrastructure which include creating the virtqueue, host + * interactions, interrupts etc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#define VT9P_MTX(_sc) (&(_sc)->vt9p_mtx) +#define VT9P_LOCK(_sc) mtx_lock(VT9P_MTX(_sc)) +#define VT9P_UNLOCK(_sc) mtx_unlock(VT9P_MTX(_sc)) +#define VT9P_LOCK_INIT(_sc) mtx_init(VT9P_MTX(_sc), \ + "VIRTIO 9P CHAN lock", NULL, MTX_DEF) +#define VT9P_LOCK_DESTROY(_sc) mtx_destroy(VT9P_MTX(_sc)) +#define MAX_SUPPORTED_SGS 20 +static MALLOC_DEFINE(M_P9FS_MNTTAG, "p9fs_mount_tag", "P9fs Mounttag"); + +struct vt9p_softc { + device_t vt9p_dev; + struct mtx vt9p_mtx; + struct sglist *vt9p_sglist; + struct cv submit_cv; + bool busy; + struct virtqueue *vt9p_vq; + int max_nsegs; + uint16_t mount_tag_len; + char *mount_tag; + STAILQ_ENTRY(vt9p_softc) chan_next; +}; + +/* Global channel list, Each channel will correspond to a mount point */ +static STAILQ_HEAD( ,vt9p_softc) global_chan_list; +struct mtx global_chan_list_mtx; + +static struct virtio_feature_desc virtio_9p_feature_desc[] = { + { VIRTIO_9PNET_F_MOUNT_TAG, "9PMountTag" }, + { 0, NULL } +}; + +static void +global_chan_list_init(void) +{ + + mtx_init(&global_chan_list_mtx, "9pglobal", + NULL, MTX_DEF); + STAILQ_INIT(&global_chan_list); +} +SYSINIT(global_chan_list_init, SI_SUB_KLD, SI_ORDER_FIRST, + global_chan_list_init, NULL); + +/* We don't currently allow canceling of virtio requests */ +static int +vt9p_cancel(void *handle, struct p9_req_t *req) +{ + + return (1); +} + +SYSCTL_NODE(_vfs, OID_AUTO, 9p, CTLFLAG_RW, 0, "9P File System Protocol"); + +/* + * Maximum number of seconds vt9p_request thread sleep waiting for an + * ack from the host, before exiting + */ +static unsigned int vt9p_ackmaxidle = 120; + +SYSCTL_UINT(_vfs_9p, OID_AUTO, ackmaxidle, CTLFLAG_RW, &vt9p_ackmaxidle, 0, + "Maximum time request thread waits for ack from host"); + +/* + * Request handler. This is called for every request submitted to the host + * It basically maps the tc/rc buffers to sg lists and submits the requests + * into the virtqueue. Since we have implemented a synchronous version, the + * submission thread sleeps until the ack in the interrupt wakes it up. Once + * it wakes up, it returns back to the P9fs layer. The rc buffer is then + * processed and completed to its upper layers. + */ +static int +vt9p_request(void *handle, struct p9_req_t *req) +{ + int error; + struct vt9p_softc *chan; + struct p9_req_t *curreq; + int readable, writable; + struct sglist *sg; + struct virtqueue *vq; + + chan = handle; + sg = chan->vt9p_sglist; + vq = chan->vt9p_vq; + + P9_DEBUG(TRANS, "%s: req=%p\n", __func__, req); + + /* Grab the channel lock*/ + VT9P_LOCK(chan); + sglist_reset(sg); + /* Handle out VirtIO ring buffers */ + error = sglist_append(sg, req->tc->sdata, req->tc->size); + if (error != 0) { + P9_DEBUG(ERROR, "%s: sglist append failed\n", __func__); + return (error); + } + readable = sg->sg_nseg; + + error = sglist_append(sg, req->rc->sdata, req->rc->capacity); + if (error != 0) { + P9_DEBUG(ERROR, "%s: sglist append failed\n", __func__); + return (error); + } + writable = sg->sg_nseg - readable; + +req_retry: + error = virtqueue_enqueue(vq, req, sg, readable, writable); + + if (error != 0) { + if (error == ENOSPC) { + /* + * Condvar for the submit queue. Unlock the chan + * since wakeup needs one. + */ + cv_wait(&chan->submit_cv, VT9P_MTX(chan)); + P9_DEBUG(TRANS, "%s: retry virtio request\n", __func__); + goto req_retry; + } else { + P9_DEBUG(ERROR, "%s: virtio enuqueue failed \n", __func__); + return (EIO); + } + } + + /* We have to notify */ + virtqueue_notify(vq); + + do { + curreq = virtqueue_dequeue(vq, NULL); + if (curreq == NULL) { + /* Nothing to dequeue, sleep until we have something */ + if (msleep(chan, VT9P_MTX(chan), 0, "chan lock", + vt9p_ackmaxidle * hz)) { + /* + * Waited for 120s. No response from host. + * Can't wait for ever.. + */ + P9_DEBUG(ERROR, "%s: timeout after waiting %u seconds" + "for an ack from host\n", __func__, vt9p_ackmaxidle); + VT9P_UNLOCK(chan); + return (EIO); + } + } else { + cv_signal(&chan->submit_cv); + /* We dequeued something, update the reply tag */ + curreq->rc->tag = curreq->tc->tag; + } + } while (req->rc->tag == P9_NOTAG); + + VT9P_UNLOCK(chan); + + P9_DEBUG(TRANS, "%s: virtio request kicked\n", __func__); + + return (0); +} + +/* + * Completion of the request from the virtqueue. This interrupt handler is + * setup at initialization and is called for every completing request. It + * just wakes up the sleeping submission thread. + */ +static void +vt9p_intr_complete(void *xsc) +{ + struct vt9p_softc *chan; + struct virtqueue *vq; + + chan = (struct vt9p_softc *)xsc; + vq = chan->vt9p_vq; + + P9_DEBUG(TRANS, "%s: completing\n", __func__); + + VT9P_LOCK(chan); + virtqueue_enable_intr(vq); + wakeup(chan); + VT9P_UNLOCK(chan); +} + +/* + * Allocation of the virtqueue with interrupt complete routines. + */ +static int +vt9p_alloc_virtqueue(struct vt9p_softc *sc) +{ + struct vq_alloc_info vq_info; + device_t dev; + + dev = sc->vt9p_dev; + + VQ_ALLOC_INFO_INIT(&vq_info, sc->max_nsegs, + vt9p_intr_complete, sc, &sc->vt9p_vq, + "%s request", device_get_nameunit(dev)); + + return (virtio_alloc_virtqueues(dev, 1, &vq_info)); +} + +/* Probe for existence of 9P virtio channels */ +static int +vt9p_probe(device_t dev) +{ + + /* If the virtio device type is a 9P device, then we claim and attach it */ + if (virtio_get_device_type(dev) != VIRTIO_ID_9P) + return (ENXIO); + device_set_desc(dev, "VirtIO 9P Transport"); + + return (BUS_PROBE_DEFAULT); +} + +static void +vt9p_stop(struct vt9p_softc *sc) +{ + + /* Device specific stops .*/ + virtqueue_disable_intr(sc->vt9p_vq); + virtio_stop(sc->vt9p_dev); +} + +/* Detach the 9P virtio PCI device */ +static int +vt9p_detach(device_t dev) +{ + struct vt9p_softc *sc; + + sc = device_get_softc(dev); + VT9P_LOCK(sc); + vt9p_stop(sc); + VT9P_UNLOCK(sc); + + if (sc->vt9p_sglist) { + sglist_free(sc->vt9p_sglist); + sc->vt9p_sglist = NULL; + } + if (sc->mount_tag) { + free(sc->mount_tag, M_P9FS_MNTTAG); + sc->mount_tag = NULL; + } + mtx_lock(&global_chan_list_mtx); + STAILQ_REMOVE(&global_chan_list, sc, vt9p_softc, chan_next); + mtx_unlock(&global_chan_list_mtx); + + VT9P_LOCK_DESTROY(sc); + cv_destroy(&sc->submit_cv); + + return (0); +} + +/* Attach the 9P virtio PCI device */ +static int +vt9p_attach(device_t dev) +{ + struct sysctl_ctx_list *ctx; + struct sysctl_oid *tree; + struct vt9p_softc *chan; + char *mount_tag; + int error; + uint16_t mount_tag_len; + + chan = device_get_softc(dev); + chan->vt9p_dev = dev; + + /* Init the channel lock. */ + VT9P_LOCK_INIT(chan); + /* Initialize the condition variable */ + cv_init(&chan->submit_cv, "Conditional variable for submit queue" ); + chan->max_nsegs = MAX_SUPPORTED_SGS; + chan->vt9p_sglist = sglist_alloc(chan->max_nsegs, M_NOWAIT); + + /* Negotiate the features from the host */ + virtio_set_feature_desc(dev, virtio_9p_feature_desc); + virtio_negotiate_features(dev, VIRTIO_9PNET_F_MOUNT_TAG); + + /* + * If mount tag feature is supported read the mount tag + * from device config + */ + if (virtio_with_feature(dev, VIRTIO_9PNET_F_MOUNT_TAG)) + mount_tag_len = virtio_read_dev_config_2(dev, + offsetof(struct virtio_9pnet_config, mount_tag_len)); + else { + error = EINVAL; + P9_DEBUG(ERROR, "%s: Mount tag feature not supported by host\n", __func__); + goto out; + } + mount_tag = malloc(mount_tag_len + 1, M_P9FS_MNTTAG, + M_WAITOK | M_ZERO); + + virtio_read_device_config(dev, + offsetof(struct virtio_9pnet_config, mount_tag), + mount_tag, mount_tag_len); + + device_printf(dev, "Mount tag: %s\n", mount_tag); + + mount_tag_len++; + chan->mount_tag_len = mount_tag_len; + chan->mount_tag = mount_tag; + + ctx = device_get_sysctl_ctx(dev); + tree = device_get_sysctl_tree(dev); + SYSCTL_ADD_STRING(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "p9fs_mount_tag", + CTLFLAG_RD, chan->mount_tag, 0, "Mount tag"); + + if (chan->vt9p_sglist == NULL) { + error = ENOMEM; + P9_DEBUG(ERROR, "%s: Cannot allocate sglist\n", __func__); + goto out; + } + + /* We expect one virtqueue, for requests. */ + error = vt9p_alloc_virtqueue(chan); + + if (error != 0) { + P9_DEBUG(ERROR, "%s: Allocating the virtqueue failed \n", __func__); + goto out; + } + + error = virtio_setup_intr(dev, INTR_TYPE_MISC|INTR_MPSAFE); + + if (error != 0) { + P9_DEBUG(ERROR, "%s: Cannot setup virtqueue interrupt\n", __func__); + goto out; + } + error = virtqueue_enable_intr(chan->vt9p_vq); + + if (error != 0) { + P9_DEBUG(ERROR, "%s: Cannot enable virtqueue interrupt\n", __func__); + goto out; + } + + mtx_lock(&global_chan_list_mtx); + /* Insert the channel in global channel list */ + STAILQ_INSERT_HEAD(&global_chan_list, chan, chan_next); + mtx_unlock(&global_chan_list_mtx); + + return (0); +out: + /* Something went wrong, detach the device */ + vt9p_detach(dev); + return (error); +} + +/* + * Allocate a new virtio channel. This sets up a transport channel + * for 9P communication + */ +static int +vt9p_create(const char *mount_tag, void **handlep) +{ + struct vt9p_softc *sc, *chan; + + chan = NULL; + + /* + * Find out the corresponding channel for a client from global list + * of channels based on mount tag and attach it to client + */ + mtx_lock(&global_chan_list_mtx); + STAILQ_FOREACH(sc, &global_chan_list, chan_next) { + if (!strcmp(sc->mount_tag, mount_tag)) { + chan = sc; + break; + } + } + mtx_unlock(&global_chan_list_mtx); + + /* + * If chan is already attached to a client then it cannot be used for + * another client. + */ + if (chan && chan->busy) { + //p9_debug(TRANS, "Channel busy: used by clnt=%p\n", chan->client); + return (EBUSY); + } + + /* If we dont have one, for now bail out.*/ + if (chan) { + *handlep = (void *)chan; + chan->busy = TRUE; + } else { + P9_DEBUG(TRANS, "%s: No Global channel with mount_tag=%s\n", + __func__, mount_tag); + return (EINVAL); + } + + return (0); +} + +static void +vt9p_close(void *handle) +{ + struct vt9p_softc *chan = handle; + chan->busy = FALSE; +} + +static struct p9_trans_module vt9p_trans = { + .name = "virtio", + .create = vt9p_create, + .close = vt9p_close, + .request = vt9p_request, + .cancel = vt9p_cancel, +}; + +static device_method_t vt9p_mthds[] = { + /* Device methods. */ + DEVMETHOD(device_probe, vt9p_probe), + DEVMETHOD(device_attach, vt9p_attach), + DEVMETHOD(device_detach, vt9p_detach), + DEVMETHOD_END +}; + +static driver_t vt9p_drv = { + "virtio_p9fs", + vt9p_mthds, + sizeof(struct vt9p_softc) +}; + +static int +vt9p_modevent(module_t mod, int type, void *unused) +{ + int error; + + error = 0; + + switch (type) { + case MOD_LOAD: + p9_init_zones(); + p9_register_trans(&vt9p_trans); + break; + case MOD_UNLOAD: + p9_destroy_zones(); + break; + case MOD_SHUTDOWN: + break; + default: + error = EOPNOTSUPP; + break; + } + return (error); +} + +DRIVER_MODULE(virtio_p9fs, virtio_pci, vt9p_drv, vt9p_modevent, 0); +MODULE_VERSION(virtio_p9fs, 1); +MODULE_DEPEND(virtio_p9fs, virtio, 1, 1, 1); +MODULE_DEPEND(virtio_p9fs, p9fs, 1, 1, 1); diff --git a/sys/fs/p9fs/p9_client.h b/sys/fs/p9fs/p9_client.h new file mode 100644 --- /dev/null +++ b/sys/fs/p9fs/p9_client.h @@ -0,0 +1,168 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* 9P client definitions */ + +#ifndef FS_P9FS_P9_CLIENT_H +#define FS_P9FS_P9_CLIENT_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* 9P protocol versions */ +enum p9_proto_versions { + p9_proto_legacy, /* legacy version */ + p9_proto_2000u, /* Unix version */ + p9_proto_2000L, /* Linux version */ +}; + +/* P9 Request exchanged between Host and Guest */ +struct p9_req_t { + struct p9_buffer *tc; /* request buffer */ + struct p9_buffer *rc; /* response buffer */ +}; + +/* 9P transport status */ +enum transport_status { + P9FS_CONNECT, /* transport is connected */ + P9FS_BEGIN_DISCONNECT,/* transport has begun to disconnect */ + P9FS_DISCONNECT, /* transport has been dosconnected */ +}; + +/* This is set by QEMU so we will oblige */ +#define P9FS_MTU 8192 + +/* + * Even though we have a 8k buffer, Qemu is typically doing 8168 + * because of a HDR of 24. Use that amount for transfers so that we dont + * drop anything. + */ +#define P9FS_IOUNIT (P9FS_MTU - 24) +#define P9FS_DIRENT_LEN 256 +#define P9_NOTAG 0 + +/* Client state information */ +struct p9_client { + struct p9_trans_module *ops; /* module API instantiated with this client */ + void *handle; /* module-specific client handle */ + struct mtx clnt_mtx; /* mutex to lock the client */ + struct mtx req_mtx; /* mutex to lock the request buffer */ + struct cv req_cv; /* condition variable on which to wake up thread */ + unsigned int msize; /* maximum data size */ + unsigned char proto_version; /* 9P version to use */ + struct unrhdr fidpool; /* fid handle accounting for session */ + struct unrhdr tagpool; /* transaction id accounting for session */ + enum transport_status trans_status; /* tranport instance state */ +}; + +/* The main fid structure which keeps track of the file.*/ +struct p9_fid { + struct p9_client *clnt; /* the instatntiating 9P client */ + uint32_t fid; /* numeric identifier */ + int mode; /* current mode of this fid */ + struct p9_qid qid; /* server identifier */ + uint32_t mtu; /* max transferrable unit at a time */ + uid_t uid; /* numeric uid of the local user who owns this handle */ + int v_opens; /* keep count on the number of opens called with this fiel handle */ + STAILQ_ENTRY(p9_fid) fid_next; /* points to next fid in the list */ +}; + +/* Directory entry structure */ +struct p9_dirent { + struct p9_qid qid; /* 9P server qid for this dirent */ + uint64_t d_off; /* offset to the next dirent */ + unsigned char d_type; /* file type */ + char d_name[P9FS_DIRENT_LEN]; /* file name */ + int len; +}; + +void p9_init_zones(void); +void p9_destroy_zones(void); + +/* Session and client Init Ops */ +struct p9_client *p9_client_create(struct mount *mp, int *error, + const char *mount_tag); +void p9_client_destroy(struct p9_client *clnt); +struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *fid, + const char *uname, uid_t n_uname, const char *aname, int *error); + +/* FILE OPS - These are individually called from the specific vop function */ + +int p9_client_open(struct p9_fid *fid, int mode); +int p9_client_close(struct p9_fid *fid); +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwnames, + char **wnames, int clone, int *error); +struct p9_fid *p9_fid_create(struct p9_client *clnt); +void p9_fid_destroy(struct p9_fid *fid); +uint16_t p9_tag_create(struct p9_client *clnt); +void p9_tag_destroy(struct p9_client *clnt, uint16_t tag); +int p9_client_clunk(struct p9_fid *fid); +int p9_client_version(struct p9_client *clnt); +int p9_client_readdir(struct p9_fid *fid, char *data, uint64_t offset, uint32_t count); +int p9_client_read(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data); +int p9_client_write(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data); +int p9_client_file_create(struct p9_fid *fid, char *name, uint32_t perm, int mode, + char *extension); +int p9_client_remove(struct p9_fid *fid); +int p9_dirent_read(struct p9_client *clnt, char *buf, int start, int len, + struct p9_dirent *dirent); +int p9_client_statfs(struct p9_fid *fid, struct p9_statfs *stat); +int p9_client_statread(struct p9_client *clnt, char *data, size_t len, struct p9_wstat *st); +int p9_is_proto_dotu(struct p9_client *clnt); +int p9_is_proto_dotl(struct p9_client *clnt); +void p9_client_cb(struct p9_client *c, struct p9_req_t *req); +int p9stat_read(struct p9_client *clnt, char *data, size_t len, struct p9_wstat *st); +void p9_client_disconnect(struct p9_client *clnt); +void p9_client_begin_disconnect(struct p9_client *clnt); +int p9_create_symlink(struct p9_fid *fid, char *name, char *symtgt, gid_t gid); +int p9_create_hardlink(struct p9_fid *dfid, struct p9_fid *oldfid, char *name); +int p9_readlink(struct p9_fid *fid, char **target); +int p9_client_renameat(struct p9_fid *oldfid, char *oldname, struct p9_fid *newfid, char *newname); +int p9_client_getattr(struct p9_fid *fid, struct p9_stat_dotl *stat_dotl, + uint64_t request_mask); +int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr); + +int p9_buf_vwritef(struct p9_buffer *buf, int proto_version, const char *fmt, + va_list ap); +int p9_buf_readf(struct p9_buffer *buf, int proto_version, const char *fmt, ...); +int p9_buf_prepare(struct p9_buffer *buf, int8_t type); +int p9_buf_finalize(struct p9_client *clnt, struct p9_buffer *buf); +void p9_buf_reset(struct p9_buffer *buf); + +#endif /* FS_P9FS_P9_CLIENT_H */ diff --git a/sys/fs/p9fs/p9_client.c b/sys/fs/p9fs/p9_client.c new file mode 100644 --- /dev/null +++ b/sys/fs/p9fs/p9_client.c @@ -0,0 +1,1319 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains 9P client functions which prepares message to be sent to + * the server. Every fileop typically has a function defined here to interact + * with the host. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define QEMU_HEADER 7 +#define P9FS_MAX_FID_CNT (1024 * 1024 * 1024) +#define P9FS_ROOT_FID_NO 2 +#define P9FS_MIN_TAG 1 +#define P9FS_MAX_TAG 65535 +#define WSTAT_SIZE 47 +#define WSTAT_EXTENSION_SIZE 14 + +static MALLOC_DEFINE(M_P9CLNT, "p9_client", "p9fs client structure"); +static uma_zone_t p9fs_fid_zone; +static uma_zone_t p9fs_req_zone; +static uma_zone_t p9fs_buf_zone; + +SYSCTL_DECL(_vfs_p9fs); +int p9_debug_level = 0; +SYSCTL_INT(_vfs_p9fs, OID_AUTO, debug_level, CTLFLAG_RW, + &p9_debug_level, 0, "p9fs debug logging"); + +static struct p9_req_t *p9_get_request(struct p9_client *c, int *error); +static struct p9_req_t *p9_client_request( + struct p9_client *c, int8_t type, int *error, const char *fmt, ...); + +inline int +p9_is_proto_dotl(struct p9_client *clnt) +{ + + return (clnt->proto_version == p9_proto_2000L); +} + +inline int +p9_is_proto_dotu(struct p9_client *clnt) +{ + + return (clnt->proto_version == p9_proto_2000u); +} + +/* Parse mount options into client structure */ +static int +p9_parse_opts(struct mount *mp, struct p9_client *clnt) +{ + int error, len; + char *trans; + + /* + * Default to virtio since thats the only transport we have for now. + */ + error = vfs_getopt(mp->mnt_optnew, "trans", (void **)&trans, &len); + if (error == ENOENT) + trans = "virtio"; + + /* These are defaults for now */ + clnt->proto_version = p9_proto_2000L; + clnt->msize = 8192; + + /* Get the default trans callback */ + clnt->ops = p9_get_trans_by_name(trans); + + return (0); +} + +/* Allocate buffer for sending request and getting responses */ +static struct p9_buffer * +p9_buffer_alloc(int alloc_msize) +{ + struct p9_buffer *fc; + + fc = uma_zalloc(p9fs_buf_zone, M_WAITOK | M_ZERO); + fc->capacity = alloc_msize; + fc->offset = 0; + fc->size = 0; + fc->sdata = (char *)fc + sizeof(struct p9_buffer); + + return (fc); +} + +/* Free memory used by request and repsonse buffers */ +static void +p9_buffer_free(struct p9_buffer **buf) +{ + + /* Free the sdata buffers first, then the whole structure*/ + uma_zfree(p9fs_buf_zone, *buf); + *buf = NULL; +} + +/* Free the request */ +static void +p9_free_req(struct p9_client *clnt, struct p9_req_t *req) +{ + + if (req->tc != NULL) { + if (req->tc->tag != P9_NOTAG) + p9_tag_destroy(clnt, req->tc->tag); + p9_buffer_free(&req->tc); + } + + if (req->rc != NULL) + p9_buffer_free(&req->rc); + + uma_zfree(p9fs_req_zone, req); +} + +/* Allocate a request by tag */ +static struct p9_req_t * +p9_get_request(struct p9_client *clnt, int *error) +{ + struct p9_req_t *req; + int alloc_msize; + uint16_t tag; + + alloc_msize = P9FS_MTU; + + req = uma_zalloc(p9fs_req_zone, M_WAITOK | M_ZERO); + req->tc = p9_buffer_alloc(alloc_msize); + req->rc = p9_buffer_alloc(alloc_msize); + + tag = p9_tag_create(clnt); + if (tag == P9_NOTAG) { + *error = EAGAIN; + req->tc->tag = P9_NOTAG; + p9_free_req(clnt, req); + return (NULL); + } + req->tc->tag = tag; + return (req); +} + +/* Parse header arguments of the response buffer */ +static int +p9_parse_receive(struct p9_buffer *buf, struct p9_client *clnt) +{ + int8_t type; + int16_t tag; + int32_t size; + int error; + + buf->offset = 0; + + /* This value is set by QEMU for the header.*/ + if (buf->size == 0) + buf->size = QEMU_HEADER; + + /* This is the initial header. Parse size, type, and tag .*/ + error = p9_buf_readf(buf, 0, "dbw", &size, &type, &tag); + if (error != 0) + goto out; + + buf->size = size; + buf->id = type; + buf->tag = tag; + P9_DEBUG(TRANS, "%s: size=%d type: %d tag: %d\n", + __func__, buf->size, buf->id, buf->tag); +out: + return (error); +} + +/* Check 9P response for any errors returned and process it */ +static int +p9_client_check_return(struct p9_client *c, struct p9_req_t *req) +{ + int error; + int ecode; + char *ename; + + /* Check what we have in the receive bufer .*/ + error = p9_parse_receive(req->rc, c); + if (error != 0) + goto out; + + /* + * No error, We are done with the preprocessing. Return to the caller + * and process the actual data. + */ + if (req->rc->id != P9PROTO_RERROR && req->rc->id != P9PROTO_RLERROR) + return (0); + + /* + * Interpreting the error is done in different ways for Linux and + * Unix version. Make sure you interpret it right. + */ + if (req->rc->id == P9PROTO_RERROR) { + error = p9_buf_readf(req->rc, c->proto_version, "s?d", &ename, &ecode); + } else if (req->rc->id == P9PROTO_RLERROR) { + error = p9_buf_readf(req->rc, c->proto_version, "d", &ecode); + } else { + goto out; + } + if (error != 0) + goto out; + + /* if there was an ecode error make this the err now */ + error = ecode; + + /* + * Note this is still not completely an error, as lookups for files + * not present can hit this and return. Hence it is made a debug print. + */ + if (error != 0) { + if (req->rc->id == P9PROTO_RERROR) { + P9_DEBUG(PROTO, "RERROR error %d ename %s\n", + error, ename); + } else if (req->rc->id == P9PROTO_RLERROR) { + P9_DEBUG(PROTO, "RLERROR error %d\n", error); + } + } + + if (req->rc->id == P9PROTO_RERROR) { + free(ename, M_TEMP); + } + return (error); + +out: + P9_DEBUG(ERROR, "couldn't parse receive buffer error%d\n", error); + return (error); +} + +/* State machine changing helpers */ +void p9_client_disconnect(struct p9_client *clnt) +{ + + P9_DEBUG(TRANS, "%s: clnt %p\n", __func__, clnt); + clnt->trans_status = P9FS_DISCONNECT; +} + +void p9_client_begin_disconnect(struct p9_client *clnt) +{ + + P9_DEBUG(TRANS, "%s: clnt %p\n", __func__, clnt); + clnt->trans_status = P9FS_BEGIN_DISCONNECT; +} + +static struct p9_req_t * +p9_client_prepare_req(struct p9_client *c, int8_t type, + int req_size, int *error, const char *fmt, __va_list ap) +{ + struct p9_req_t *req; + + P9_DEBUG(TRANS, "%s: client %p op %d\n", __func__, c, type); + + /* + * Before we start with the request, check if its possible to finish + * this request. We are allowed to submit the request only if there + * are no close sessions happening or else there can be race. If the + * status is Disconnected, we stop any requests coming in after that. + */ + if (c->trans_status == P9FS_DISCONNECT) { + *error = EIO; + return (NULL); + } + + /* Allow only cleanup clunk messages once teardown has started. */ + if ((c->trans_status == P9FS_BEGIN_DISCONNECT) && + (type != P9PROTO_TCLUNK)) { + *error = EIO; + return (NULL); + } + + /* Allocate buffer for transferring and receiving data from host */ + req = p9_get_request(c, error); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: request allocation failed.\n", __func__); + return (NULL); + } + + /* Marshall the data according to QEMU standards */ + *error = p9_buf_prepare(req->tc, type); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: p9_buf_prepare failed: %d\n", + __func__, *error); + goto out; + } + + *error = p9_buf_vwritef(req->tc, c->proto_version, fmt, ap); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: p9_buf_vwrite failed: %d\n", + __func__, *error); + goto out; + } + + *error = p9_buf_finalize(c, req->tc); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: p9_buf_finalize failed: %d \n", + __func__, *error); + goto out; + } + + return (req); +out: + p9_free_req(c, req); + return (NULL); +} + +/* + * Issue a request and wait for response. The routine takes care of preparing + * the 9P request header to be sent, parsing and checking for error conditions + * in the received buffer. It returns the request structure. + */ +static struct p9_req_t * +p9_client_request(struct p9_client *c, int8_t type, int *error, + const char *fmt, ...) +{ + va_list ap; + struct p9_req_t *req; + + va_start(ap, fmt); + req = p9_client_prepare_req(c, type, c->msize, error, fmt, ap); + va_end(ap); + + /* Issue with allocation of request buffer */ + if (*error != 0) + return (NULL); + + /* Call into the transport for submission. */ + *error = c->ops->request(c->handle, req); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, *error); + goto out; + } + + /* + * Before we return, pre process the header and the rc buffer before + * calling into the protocol infra to analyze the data in rc. + */ + *error = p9_client_check_return(c, req); + if (*error != 0) + goto out; + + return (req); +out: + p9_free_req(c, req); + return (NULL); +} + +/* Setup tag contents and structure */ +uint16_t +p9_tag_create(struct p9_client *clnt) +{ + int tag; + + tag = alloc_unr(&clnt->tagpool); + P9_DEBUG(LPROTO, "%s: clnt %p: tag %d\n", __func__, clnt, tag); + + /* Alloc_unr returning -1 is an error for no units left */ + if (tag == -1) { + return (P9_NOTAG); + } + return (tag); +} + +/* Clean up tag structures */ +void +p9_tag_destroy(struct p9_client *clnt, uint16_t tag) +{ + + P9_DEBUG(LPROTO, "%s: clnt %p: tag %d\n", __func__, clnt, tag); + + /* Release to the pool */ + free_unr(&clnt->tagpool, tag); +} + +/* Allocate a new fid from the fidpool */ +struct p9_fid * +p9_fid_create(struct p9_client *clnt) +{ + struct p9_fid *fid; + + + fid = uma_zalloc(p9fs_fid_zone, M_WAITOK | M_ZERO); + fid->fid = alloc_unr(&clnt->fidpool); + P9_DEBUG(LPROTO, "%s: fid %d\n", __func__, fid->fid); + + /* Alloc_unr returning -1 is an error for no units left */ + if (fid->fid == -1) { + uma_zfree(p9fs_fid_zone, fid); + return (NULL); + } + fid->mode = -1; + fid->uid = -1; + fid->clnt = clnt; + + return (fid); +} + +/* Free the fid by releasing it to fidpool */ +void +p9_fid_destroy(struct p9_fid *fid) +{ + struct p9_client *clnt; + + P9_DEBUG(LPROTO, "%s: fid %d\n", __func__, fid->fid); + clnt = fid->clnt; + /* Release to the pool */ + free_unr(&clnt->fidpool, fid->fid); + uma_zfree(p9fs_fid_zone, fid); +} + +/* Request the version of 9P protocol */ +int +p9_client_version(struct p9_client *c) +{ + int error; + struct p9_req_t *req; + char *version; + int msize; + + error = 0; + + P9_DEBUG(PROTO, "TVERSION msize %d protocol %d\n", + c->msize, c->proto_version); + + switch (c->proto_version) { + case p9_proto_2000L: + req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds", + c->msize, "9P2000.L"); + break; + case p9_proto_2000u: + req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds", + c->msize, "9P2000.u"); + break; + case p9_proto_legacy: + req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds", + c->msize, "9P2000"); + break; + default: + return (EINVAL); + } + + /* Always return the relevant error code */ + if (error != 0) + return (error); + + error = p9_buf_readf(req->rc, c->proto_version, "ds", &msize, &version); + if (error != 0) { + P9_DEBUG(ERROR, "%s: version error: %d\n", __func__, error); + goto out; + } + + P9_DEBUG(PROTO, "RVERSION msize %d %s\n", msize, version); + + if (!strncmp(version, "9P2000.L", 8)) + c->proto_version = p9_proto_2000L; + else if (!strncmp(version, "9P2000.u", 8)) + c->proto_version = p9_proto_2000u; + else if (!strncmp(version, "9P2000", 6)) + c->proto_version = p9_proto_legacy; + else { + error = ENOMEM; + goto out; + } + + /* limit the msize .*/ + if (msize < c->msize) + c->msize = msize; +out: + p9_free_req(c, req); + return (error); +} + +/* + * Initialize zones for different things. This is called from Init module + * so that we just have them initalized once. + */ +void +p9_init_zones(void) +{ + + /* Create the request and the fid zones */ + p9fs_fid_zone = uma_zcreate("p9fs fid zone", + sizeof(struct p9_fid), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + /* Create the request and the fid zones */ + p9fs_req_zone = uma_zcreate("p9fs req zone", + sizeof(struct p9_req_t), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + /* Create the buffer zone */ + p9fs_buf_zone = uma_zcreate("p9fs buf zone", + sizeof(struct p9_buffer) + P9FS_MTU, NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, 0); +} + +void +p9_destroy_zones(void) +{ + + uma_zdestroy(p9fs_fid_zone); + uma_zdestroy(p9fs_req_zone); + uma_zdestroy(p9fs_buf_zone); +} + +/* Return the client to the session in the FS to hold it */ +struct p9_client * +p9_client_create(struct mount *mp, int *error, const char *mount_tag) +{ + struct p9_client *clnt; + + clnt = malloc(sizeof(struct p9_client), M_P9CLNT, M_WAITOK | M_ZERO); + mtx_init(&clnt->clnt_mtx, "p9clnt", NULL, MTX_DEF); + + /* Parse should have set trans_mod */ + *error = p9_parse_opts(mp, clnt); + if (*error != 0) + goto out; + + if (clnt->ops == NULL) { + *error = EINVAL; + P9_DEBUG(ERROR, "%s: no transport\n", __func__); + goto out; + } + + /* All the structures from here are protected by the lock clnt_mtx */ + init_unrhdr(&clnt->fidpool, P9FS_ROOT_FID_NO, P9FS_MAX_FID_CNT, + &clnt->clnt_mtx); + init_unrhdr(&clnt->tagpool, P9FS_MIN_TAG, P9FS_MAX_TAG, + &clnt->clnt_mtx); + + P9_DEBUG(TRANS, "%s: clnt %p trans %p msize %d protocol %d\n", + __func__, clnt, clnt->ops, clnt->msize, clnt->proto_version); + + *error = clnt->ops->create(mount_tag, &clnt->handle); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: transport create failed .%d \n", + __func__, *error); + goto out; + } + clnt->trans_status = P9FS_CONNECT; + + *error = p9_client_version(clnt); + if (*error != 0) + goto out; + + P9_DEBUG(TRANS, "%s: client creation succeeded.\n", __func__); + return (clnt); +out: + free(clnt, M_P9CLNT); + return (NULL); +} + +/* Destroy the client by destroying associated fidpool and tagpool */ +void +p9_client_destroy(struct p9_client *clnt) +{ + + P9_DEBUG(TRANS, "%s: client %p\n", __func__, clnt); + clnt->ops->close(clnt->handle); + + P9_DEBUG(TRANS, "%s : Destroying fidpool\n", __func__); + clear_unrhdr(&clnt->fidpool); + + P9_DEBUG(TRANS, "%s : Destroying tagpool\n", __func__); + clear_unrhdr(&clnt->tagpool); + + free(clnt, M_P9CLNT); +} + +/* + * Attach a user to the filesystem. Create a fid for that user to access + * the root of the filesystem. + */ +struct p9_fid * +p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, + const char *uname, uid_t n_uname, const char *aname, int *error) +{ + struct p9_req_t *req; + struct p9_fid *fid; + struct p9_qid qid; + + P9_DEBUG(PROTO, "TATTACH uname=%s aname=%s, n_uname=%d\n", + uname, aname, n_uname); + fid = p9_fid_create(clnt); + if (fid == NULL) { + *error = ENOMEM; + return (NULL); + } + fid->uid = n_uname; + + req = p9_client_request(clnt, P9PROTO_TATTACH, error, "ddssd", fid->fid, + P9PROTO_NOFID, uname, aname, n_uname); + if (*error != 0) + goto out; + + *error = p9_buf_readf(req->rc, clnt->proto_version, "Q", &qid); + if (*error != 0) { + P9_DEBUG(ERROR, "%s: p9_buf_readf failed: %d \n", + __func__, *error); + goto out; + } + + P9_DEBUG(PROTO, "RATTACH qid %x.%llx.%x\n", + qid.type, (unsigned long long)qid.path, qid.version); + + memmove(&fid->qid, &qid, sizeof(struct p9_qid)); + p9_free_req(clnt, req); + + return (fid); +out: + if (req != NULL) + p9_free_req(clnt, req); + if (fid != NULL) + p9_fid_destroy(fid); + + return (NULL); +} + +/* Delete a file/directory. Corresponding fid will be cluncked too */ +int +p9_client_remove(struct p9_fid *fid) +{ + int error; + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DEBUG(PROTO, "TREMOVE fid %d\n", fid->fid); + + error = 0; + clnt = fid->clnt; + + req = p9_client_request(clnt, P9PROTO_TREMOVE, &error, "d", fid->fid); + if (error != 0) { + P9_DEBUG(PROTO, "RREMOVE fid %d\n", fid->fid); + return (error); + } + + p9_free_req(clnt, req); + return (error); +} + +/* Inform the file server that the current file represented by fid is no longer + * needed by the client. Any allocated fid on the server needs a clunk to be + * destroyed. + */ +int +p9_client_clunk(struct p9_fid *fid) +{ + int error; + struct p9_client *clnt; + struct p9_req_t *req; + + error = 0; + + if (fid == NULL) { + P9_DEBUG(ERROR, "%s: clunk with NULL fid is bad\n", __func__); + return (0); + } + + P9_DEBUG(PROTO, "TCLUNK fid %d \n", fid->fid); + + clnt = fid->clnt; + req = p9_client_request(clnt, P9PROTO_TCLUNK, &error, "d", fid->fid); + if (req != NULL) { + P9_DEBUG(PROTO, "RCLUNK fid %d\n", fid->fid); + p9_free_req(clnt, req); + } + + p9_fid_destroy(fid); + return (error); +} + +/* + * Client_walk is for searching any component name in a directory. + * This is usually called on lookups. Also when we need a new open fid + * as 9p needs to have an open fid for every file to fileops, we call this + * validate the component of the file and return the newfid(openfid) created. + */ +struct p9_fid * +p9_client_walk(struct p9_fid *oldfid, uint16_t nwnames, char **wnames, + int clone, int *error) +{ + struct p9_client *clnt; + struct p9_fid *fid; + struct p9_qid *wqids; + struct p9_req_t *req; + uint16_t nwqids, count; + + clnt = oldfid->clnt; + wqids = NULL; + nwqids = 0; + + /* + * Before, we go and create fid, make sure we are not tearing + * down. Only then we create. + * Allow only cleanup clunk messages once we are starting to teardown. + */ + if (clnt->trans_status != P9FS_CONNECT) { + *error = EIO; + return (NULL); + } + + if (clone) { + fid = p9_fid_create(clnt); + if (fid == NULL) { + *error = ENOMEM; + return (NULL); + } + fid->uid = oldfid->uid; + } else + fid = oldfid; + + P9_DEBUG(PROTO, "TWALK fids %d,%d nwnames %u wname %s\n", + oldfid->fid, fid->fid, nwnames, + wnames ? wnames[nwnames-1] : NULL); + + /* + * The newfid is for the component in search. We are preallocating as + * qemu on other side allocates or returns a fid if it sees a match + */ + req = p9_client_request(clnt, P9PROTO_TWALK, error, "ddT", oldfid->fid, + fid->fid, wnames, nwnames); + if (*error != 0) { + if (fid != oldfid) + p9_fid_destroy(fid); + return (NULL); + } + + *error = p9_buf_readf(req->rc, clnt->proto_version, "R", &nwqids, + &wqids); + if (*error != 0) + goto out; + + P9_DEBUG(PROTO, "RWALK nwqid %d:\n", nwqids); + + if (nwqids != nwnames) { + *error = ENOENT; + goto out; + } + + for (count = 0; count < nwqids; count++) + P9_DEBUG(TRANS, "%s: [%d] %x.%llx.%x\n", + __func__, count, wqids[count].type, + (unsigned long long)wqids[count].path, + wqids[count].version); + + if (nwnames) + memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid)); + else + fid->qid = oldfid->qid; + + p9_free_req(clnt, req); + free(wqids, M_TEMP); + return (fid); + +out: + p9_free_req(clnt, req); + if (wqids) + free(wqids, M_TEMP); + if (fid && fid != oldfid) + p9_client_clunk(fid); + return (NULL); +} + +/* Open a file with given fid and mode */ +int +p9_client_open(struct p9_fid *fid, int mode) +{ + int error, mtu; + struct p9_client *clnt; + struct p9_req_t *req; + + error = 0; + clnt = fid->clnt; + mtu = 0; + + P9_DEBUG(PROTO, "%s fid %d mode %d\n", + p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", + fid->fid, mode); + + if (fid->mode != -1) + return (EINVAL); + + if (p9_is_proto_dotl(clnt)) + req = p9_client_request(clnt, P9PROTO_TLOPEN, &error, "dd", + fid->fid, mode); + else + req = p9_client_request(clnt, P9PROTO_TOPEN, &error, "db", + fid->fid, mode); + + if (error != 0) + return (error); + + error = p9_buf_readf(req->rc, clnt->proto_version, "Qd", &fid->qid, + &mtu); + if (error != 0) + goto out; + + P9_DEBUG(PROTO, "%s qid %x.%llx.%x mtu %x\n", + p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", + (fid->qid).type, (unsigned long long)(fid->qid).path, + (fid->qid).version, mtu); + + fid->mode = mode; + fid->mtu = mtu; +out: + p9_free_req(clnt, req); + return (error); +} + +/* Request to get directory entries */ +int +p9_client_readdir(struct p9_fid *fid, char *data, uint64_t offset, + uint32_t count) +{ + int error; + uint32_t rsize; + struct p9_client *clnt; + struct p9_req_t *req; + char *dataptr; + + P9_DEBUG(PROTO, "TREADDIR fid %d offset %llu count %d\n", + fid->fid, (unsigned long long) offset, count); + + error = 0; + rsize = fid->mtu; + clnt = fid->clnt; + + if (!rsize || rsize > clnt->msize) + rsize = clnt->msize; + + if (count < rsize) + rsize = count; + + req = p9_client_request(clnt, P9PROTO_TREADDIR, &error, "dqd", + fid->fid, offset, rsize); + + if (error != 0) { + P9_DEBUG(ERROR, "%s: couldn't allocate req in client_readdir\n", + __func__); + return (-error); + } + + error = p9_buf_readf(req->rc, clnt->proto_version, "D", &count, + &dataptr); + if (error != 0) { + P9_DEBUG(ERROR, "%s: p0_buf_readf failed: %d\n", + __func__, error); + p9_free_req(clnt, req); + return (-error); + } + + P9_DEBUG(PROTO, "RREADDIR count %u\n", count); + + /* Copy back the data into the input buffer. */ + memmove(data, dataptr, count); + p9_free_req(clnt, req); + return (count); +} + +/* + * Read count bytes from offset for the file fid into the character + * buffer data. This buffer is handed over to p9fs to process into user + * buffers. Note that this function typically returns the number of bytes read + * so in case of an error we return -error so that we can distinguish between + * error codes and bytes. + */ +int +p9_client_read(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data) +{ + struct p9_client *clnt; + struct p9_req_t *req; + char *dataptr; + int error, rsize; + + clnt = fid->clnt; + rsize = fid->mtu; + error = 0; + + P9_DEBUG(PROTO, "TREAD fid %d offset %llu %u\n", + fid->fid, (unsigned long long) offset, count); + + if (!rsize || rsize > clnt->msize) + rsize = clnt->msize; + + if (count < rsize) + rsize = count; + + /* At this stage, we only have 8K buffers so only transfer */ + req = p9_client_request(clnt, P9PROTO_TREAD, &error, "dqd", fid->fid, + offset, rsize); + if (error != 0) { + P9_DEBUG(ERROR, "%s: failed allocate request\n", __func__); + return (-error); + } + + error = p9_buf_readf(req->rc, clnt->proto_version, "D", &count, + &dataptr); + if (error != 0) { + P9_DEBUG(ERROR, "%s: p9_buf_readf failed: %d\n", + __func__, error); + goto out; + } + + if (rsize < count) { + P9_DEBUG(PROTO, "RREAD count (%d > %d)\n", count, rsize); + count = rsize; + } + + P9_DEBUG(PROTO, "RREAD count %d\n", count); + + if (count == 0) { + error = -EIO; + P9_DEBUG(ERROR, "%s: EIO error in client_read \n", __func__); + goto out; + } + + /* Copy back the data into the input buffer. */ + memmove(data, dataptr, count); + p9_free_req(clnt, req); + return (count); +out: + p9_free_req(clnt, req); + return (-error); +} + +/* + * Write count bytes from buffer to the offset for the file fid + * Note that this function typically returns the number of bytes written + * so in case of an error we return -error so that we can distinguish between + * error codes and bytes. + */ + +int +p9_client_write(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data) +{ + struct p9_client *clnt; + struct p9_req_t *req; + int ret, error, rsize; + + clnt = fid->clnt; + rsize = fid->mtu; + ret = 0; + error = 0; + + P9_DEBUG(PROTO, "TWRITE fid %d offset %llu %u\n", + fid->fid, (unsigned long long) offset, count); + + if (!rsize || rsize > clnt->msize) + rsize = clnt->msize; + + /* Limit set by Qemu ,8168 */ + if (count > rsize) { + count = rsize; + } + + /* + * Doing the Data blob instead. If at all we add the zerocopy, we can + * change it to uio direct copy + */ + req = p9_client_request(clnt, P9PROTO_TWRITE, &error, "dqD", fid->fid, + offset, count, data); + if (error != 0) { + P9_DEBUG(ERROR, "%s: failed allocate request: %d\n", + __func__, error); + return (-error); + } + + error = p9_buf_readf(req->rc, clnt->proto_version, "d", &ret); + if (error != 0) { + P9_DEBUG(ERROR, "%s: p9_buf_readf error: %d\n", + __func__, error); + goto out; + } + + if (count < ret) { + P9_DEBUG(PROTO, "RWRITE count (%d > %d)\n", count, ret); + ret = count; + } + P9_DEBUG(PROTO, "RWRITE count %d\n", ret); + + if (count == 0) { + error = EIO; + P9_DEBUG(ERROR, "%s: EIO error\n", __func__); + goto out; + } + + p9_free_req(clnt, req); + return (ret); +out: + p9_free_req(clnt, req); + return (-error); +} + + +/* Create file under directory fid, with name, permissions, mode. */ +int +p9_client_file_create(struct p9_fid *fid, char *name, uint32_t perm, int mode, + char *extension) +{ + int error; + struct p9_client *clnt; + struct p9_req_t *req; + struct p9_qid qid; + int mtu; + + P9_DEBUG(PROTO, "TCREATE fid %d name %s perm %d mode %d\n", + fid->fid, name, perm, mode); + + clnt = fid->clnt; + error = 0; + + if (fid->mode != -1) + return (EINVAL); + + req = p9_client_request(clnt, P9PROTO_TCREATE, &error, "dsdb?s", + fid->fid, name, perm, mode, extension); + if (error != 0) + return (error); + + error = p9_buf_readf(req->rc, clnt->proto_version, "Qd", &qid, &mtu); + if (error != 0) + goto out; + + P9_DEBUG(PROTO, "RCREATE qid %x.%jx.%x mtu %x\n", + qid.type, (uintmax_t)qid.path, qid.version, mtu); + fid->mode = mode; + fid->mtu = mtu; + +out: + p9_free_req(clnt, req); + return (error); +} + +/* Request file system information of the file system */ +int +p9_client_statfs(struct p9_fid *fid, struct p9_statfs *stat) +{ + int error; + struct p9_req_t *req; + struct p9_client *clnt; + + error = 0; + clnt = fid->clnt; + + P9_DEBUG(PROTO, "TSTATFS fid %d\n", fid->fid); + + req = p9_client_request(clnt, P9PROTO_TSTATFS, &error, "d", fid->fid); + if (error != 0) { + return (error); + } + + error = p9_buf_readf(req->rc, clnt->proto_version, "ddqqqqqqd", + &stat->type, &stat->bsize, &stat->blocks, &stat->bfree, + &stat->bavail, &stat->files, &stat->ffree, &stat->fsid, + &stat->namelen); + + if (error != 0) + goto out; + + P9_DEBUG(PROTO, "RSTATFS fid %d type 0x%jx bsize %ju " + "blocks %ju bfree %ju bavail %ju files %ju ffree %ju " + "fsid %ju namelen %ju\n", + fid->fid, (uintmax_t)stat->type, + (uintmax_t)stat->bsize, (uintmax_t)stat->blocks, + (uintmax_t)stat->bfree, (uintmax_t)stat->bavail, + (uintmax_t)stat->files, (uintmax_t)stat->ffree, + (uintmax_t)stat->fsid, (uintmax_t)stat->namelen); + +out: + p9_free_req(clnt, req); + return (error); +} + +/* Rename file referenced by the fid */ +int +p9_client_renameat(struct p9_fid *oldfid, char *oldname, struct p9_fid *newfid, + char *newname) +{ + int error; + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DEBUG(PROTO, "TRENAMEAT oldfid %d oldname %s newfid %d newfid %s", + oldfid->fid, oldname, newfid->fid, newname); + + error = 0; + clnt = oldfid->clnt; + + /* + * we are calling the request with TRENAMEAT tag and not TRENAME with + * the 9p protocol version 9p2000.u as the QEMU version supports this + * version of renaming + */ + req = p9_client_request(clnt, P9PROTO_TRENAMEAT, &error, "dsds", + oldfid->fid, oldname, newfid->fid, newname); + + if (error != 0) + return (error); + + p9_free_req(clnt, req); + return (error); +} + +/* Request to create symbolic link */ +int +p9_create_symlink(struct p9_fid *fid, char *name, char *symtgt, gid_t gid) +{ + int error; + struct p9_req_t *req; + struct p9_client *clnt; + struct p9_qid qid; + + error = 0; + clnt = fid->clnt; + + P9_DEBUG(PROTO, "TSYMLINK fid %d name %s\n", fid->fid, name); + + req = p9_client_request(clnt, P9PROTO_TSYMLINK, &error, "dssd", + fid->fid, name, symtgt, gid); + + if (error != 0) + return (error); + + error = p9_buf_readf(req->rc, clnt->proto_version, "Q", &qid); + if (error != 0) { + P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, error); + return (error); + } + + P9_DEBUG(PROTO, "RSYMLINK qid %x.%jx.%x\n", + qid.type, (uintmax_t)qid.path, qid.version); + + p9_free_req(clnt, req); + return (0); +} + +/* Request to create hard link */ +int +p9_create_hardlink(struct p9_fid *dfid, struct p9_fid *oldfid, char *name) +{ + int error; + struct p9_req_t *req; + struct p9_client *clnt; + + error = 0; + clnt = dfid->clnt; + + P9_DEBUG(PROTO, "TLINK dfid %d oldfid %d name %s\n", + dfid->fid, oldfid->fid, name); + + req = p9_client_request(clnt, P9PROTO_TLINK, &error, "dds", dfid->fid, + oldfid->fid, name); + if (error != 0) + return (error); + + p9_free_req(clnt, req); + return (0); +} + +/* Request to read contents of symbolic link */ +int +p9_readlink(struct p9_fid *fid, char **target) +{ + int error; + struct p9_client *clnt; + struct p9_req_t *req; + + error = 0; + clnt = fid->clnt; + + P9_DEBUG(PROTO, "TREADLINK fid %d\n", fid->fid); + + req = p9_client_request(clnt, P9PROTO_TREADLINK, &error, "d", fid->fid); + if (error != 0) + return (error); + + error = p9_buf_readf(req->rc, clnt->proto_version, "s", target); + if (error != 0) { + P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, error); + return (error); + } + + P9_DEBUG(PROTO, "RREADLINK target %s \n", *target); + + p9_free_req(clnt, req); + return (0); +} + +/* Get file attributes of the file referenced by the fid */ +int +p9_client_getattr(struct p9_fid *fid, struct p9_stat_dotl *stat_dotl, + uint64_t request_mask) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + + P9_DEBUG(PROTO, "TGETATTR fid %d mask %ju\n", + fid->fid, (uintmax_t)request_mask); + + clnt = fid->clnt; + req = p9_client_request(clnt, P9PROTO_TGETATTR, &err, "dq", fid->fid, + request_mask); + if (req == NULL) { + P9_DEBUG(ERROR, "%s: allocation failed %d", __func__, err); + goto error; + } + + err = p9_buf_readf(req->rc, clnt->proto_version, "A", stat_dotl); + if (err != 0) { + P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, err); + goto error; + } + + p9_free_req(clnt, req); + P9_DEBUG(PROTO, "RGETATTR fid %d qid %x.%jx.%x st_mode %8.8x " + "uid %d gid %d nlink %ju rdev %jx st_size %jx blksize %ju " + "blocks %ju st_atime_sec %ju, st_atime_nsec %ju " + "st_mtime_sec %ju, st_mtime_nsec %ju st_ctime_sec %ju " + "st_ctime_nsec %ju st_btime_sec %ju, st_btime_nsec %ju " + "st_stat %ju, st_data_version %ju \n", fid->fid, + stat_dotl->qid.type, (uintmax_t)stat_dotl->qid.path, + stat_dotl->qid.version, stat_dotl->st_mode, stat_dotl->st_uid, + stat_dotl->st_gid, (uintmax_t)stat_dotl->st_nlink, + (uintmax_t)stat_dotl->st_rdev, (uintmax_t)stat_dotl->st_size, + (uintmax_t)stat_dotl->st_blksize, + (uintmax_t)stat_dotl->st_blocks, (uintmax_t)stat_dotl->st_atime_sec, + (uintmax_t)stat_dotl->st_atime_nsec, (uintmax_t)stat_dotl->st_mtime_sec, + (uintmax_t)stat_dotl->st_mtime_nsec, (uintmax_t)stat_dotl->st_ctime_sec, + (uintmax_t)stat_dotl->st_ctime_nsec, (uintmax_t)stat_dotl->st_btime_sec, + (uintmax_t)stat_dotl->st_btime_nsec, (uintmax_t)stat_dotl->st_gen, + (uintmax_t)stat_dotl->st_data_version); + + return (err); + +error: + if (req != NULL) + p9_free_req(clnt, req); + + return (err); +} + +/* Set file attributes of the file referenced by the fid */ +int +p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + err = 0; + + P9_DEBUG(PROTO, "TSETATTR fid %d" + " valid %x mode %x uid %d gid %d size %ju" + " atime_sec %ju atime_nsec %ju" + " mtime_sec %ju mtime_nsec %ju\n", + fid->fid, + p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid, + (uintmax_t)p9attr->size, (uintmax_t)p9attr->atime_sec, + (uintmax_t)p9attr->atime_nsec, (uintmax_t)p9attr->mtime_sec, + (uintmax_t)p9attr->mtime_nsec); + + clnt = fid->clnt; + + /* Any client_request error is converted to req == NULL error*/ + req = p9_client_request(clnt, P9PROTO_TSETATTR, &err, "dA", fid->fid, + p9attr); + + if (req == NULL) { + P9_DEBUG(ERROR, "%s: allocation failed %d\n", __func__, err); + goto error; + } + + p9_free_req(clnt, req); +error: + return (err); +} + diff --git a/sys/fs/p9fs/p9_debug.h b/sys/fs/p9fs/p9_debug.h new file mode 100644 --- /dev/null +++ b/sys/fs/p9fs/p9_debug.h @@ -0,0 +1,45 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef FS_P9FS_P9_DEBUG_H +#define FS_P9FS_P9_DEBUG_H + +extern int p9_debug_level; /* All debugs on now */ + +/* 9P debug flags */ +#define P9_DEBUG_TRANS 0x0001 /* Trace transport */ +#define P9_DEBUG_SUBR 0x0002 /* Trace driver submissions */ +#define P9_DEBUG_LPROTO 0x0004 /* Low level protocol tracing */ +#define P9_DEBUG_PROTO 0x0008 /* High level protocol tracing */ +#define P9_DEBUG_VOPS 0x0010 /* VOPs tracing */ +#define P9_DEBUG_ERROR 0x0020 /* verbose error messages */ + +#define P9_DEBUG(category, fmt, ...) do { \ + if ((p9_debug_level & P9_DEBUG_##category) != 0) \ + printf(fmt, ##__VA_ARGS__); \ +} while (0) + +#endif /* FS_P9FS_P9_DEBUG_H */ diff --git a/sys/fs/p9fs/p9_protocol.h b/sys/fs/p9fs/p9_protocol.h new file mode 100644 --- /dev/null +++ b/sys/fs/p9fs/p9_protocol.h @@ -0,0 +1,280 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* File contains 9P protocol definitions */ + +#ifndef FS_P9FS_P9_PROTOCOL_H +#define FS_P9FS_P9_PROTOCOL_H + +#include + +/* 9P message types */ +enum p9_cmds_t { + P9PROTO_TLERROR = 6, /* not used */ + P9PROTO_RLERROR, /* response for any failed request */ + P9PROTO_TSTATFS = 8, /* file system status request */ + P9PROTO_RSTATFS, /* file system status response */ + P9PROTO_TLOPEN = 12, /* open a file (9P2000.L) */ + P9PROTO_RLOPEN, /* response to opne request (9P2000.L) */ + P9PROTO_TLCREATE = 14, /* prepare for handle for I/O on a new file (9P2000.L) */ + P9PROTO_RLCREATE, /* response with file access information (9P2000.L) */ + P9PROTO_TSYMLINK = 16, /* symlink creation request */ + P9PROTO_RSYMLINK, /* symlink creation response */ + P9PROTO_TMKNOD = 18, /* create a special file object request */ + P9PROTO_RMKNOD, /* create a special file object response */ + P9PROTO_TRENAME = 20, /* rename a file request */ + P9PROTO_RRENAME, /* rename a file response */ + P9PROTO_TREADLINK = 22, /* request to read value of symbolic link */ + P9PROTO_RREADLINK, /* response to read value of symbolic link request */ + P9PROTO_TGETATTR = 24, /* get file attributes request */ + P9PROTO_RGETATTR, /* get file attributes response */ + P9PROTO_TSETATTR = 26, /* set file attributes request */ + P9PROTO_RSETATTR, /* set file attributes response */ + P9PROTO_TXATTRWALK = 30,/* request to read extended attributes */ + P9PROTO_RXATTRWALK, /* response from server with attributes */ + P9PROTO_TXATTRCREATE = 32,/* request to set extended attribute */ + P9PROTO_RXATTRCREATE, /* response from server for setting extended attribute */ + P9PROTO_TREADDIR = 40, /* request to read a directory */ + P9PROTO_RREADDIR, /* response from server for read request */ + P9PROTO_TFSYNC = 50, /* request to flush an cached data to disk */ + P9PROTO_RFSYNC, /* response when cache dat is flushed */ + P9PROTO_TLOCK = 52, /* acquire or release a POSIX record lock */ + P9PROTO_RLOCK, /* response with the status of the lock */ + P9PROTO_TGETLOCK = 54, /* request to check for presence of a POSIX record lock */ + P9PROTO_RGETLOCK, /* response with the details of the lock if acquired */ + P9PROTO_TLINK = 70, /* request to create hard link */ + P9PROTO_RLINK, /* create hard link response */ + P9PROTO_TMKDIR = 72, /* create a directory request */ + P9PROTO_RMKDIR, /* create a directory response */ + P9PROTO_TRENAMEAT = 74, /* request to rename a file or directory */ + P9PROTO_RRENAMEAT, /* reponse to rename request */ + P9PROTO_TUNLINKAT = 76, /* unlink a file or directory */ + P9PROTO_RUNLINKAT, /* reponse to unlink request */ + P9PROTO_TVERSION = 100, /* request for version handshake */ + P9PROTO_RVERSION, /* response for version handshake */ + P9PROTO_TAUTH = 102, /* request to establish authentication channel */ + P9PROTO_RAUTH, /* response with authentication information */ + P9PROTO_TATTACH = 104, /* establish a user access to a file system*/ + P9PROTO_RATTACH, /* response with top level handle to file hierarchy */ + P9PROTO_TERROR = 106, /* not used */ + P9PROTO_RERROR, /* response for any failed request */ + P9PROTO_TFLUSH = 108, /* request to abort a previous request */ + P9PROTO_RFLUSH, /* response when previous request has been cancelled */ + P9PROTO_TWALK = 110, /* descend a directory hierarchy */ + P9PROTO_RWALK, /* response with new handle for position within hierarchy */ + P9PROTO_TOPEN = 112, /* prepare file handle for I/O for an existing file */ + P9PROTO_ROPEN, /* response with file access information */ + P9PROTO_TCREATE = 114, /* prepare for handle for I/O on a new file */ + P9PROTO_RCREATE, /* response with file access information */ + P9PROTO_TREAD = 116, /* request to transfer data from a file */ + P9PROTO_RREAD, /* response with data requested */ + P9PROTO_TWRITE = 118, /* request to transfer data to a file */ + P9PROTO_RWRITE, /* response with how much data was written to the file */ + P9PROTO_TCLUNK = 120, /* forget about a handle to a file within the File System */ + P9PROTO_RCLUNK, /* response from the server for forgetting the file handle */ + P9PROTO_TREMOVE = 122, /* request to remove a file */ + P9PROTO_RREMOVE, /* response when server has removed the file */ + P9PROTO_TSTAT = 124, /* request file entity attributes */ + P9PROTO_RSTAT, /* response with file entity attributes */ + P9PROTO_TWSTAT = 126, /* request to update file entity attributes */ + P9PROTO_RWSTAT, /* response when file entity attributes are updated */ +}; + +/* File Open Modes */ +enum p9_open_mode_t { + P9PROTO_OREAD = 0x00, /* open file for reading only */ + P9PROTO_OWRITE = 0x01, /* open file for writing only */ + P9PROTO_ORDWR = 0x02, /* open file for both reading and writing */ + P9PROTO_OEXEC = 0x03, /* open file for execution */ + P9PROTO_OTRUNC = 0x10, /* truncate file to zero length before opening it */ + P9PROTO_OREXEC = 0x20, /* close the file when exec system call is made */ + P9PROTO_ORCLOSE = 0x40, /* remove the file when it is closed */ + P9PROTO_OAPPEND = 0x80, /* open the file and seek to the end of the file */ + P9PROTO_OEXCL = 0x1000, /* only create a file and not open it */ +}; + +/* FIle Permissions */ +enum p9_perm_t { + P9PROTO_DMDIR = 0x80000000, /* permission bit for directories */ + P9PROTO_DMAPPEND = 0x40000000, /* permission bit for is append-only */ + P9PROTO_DMEXCL = 0x20000000, /* permission bit for exclusive use (only one open handle allowed) */ + P9PROTO_DMMOUNT = 0x10000000, /* permission bit for mount points */ + P9PROTO_DMAUTH = 0x08000000, /* permission bit for authentication file */ + P9PROTO_DMTMP = 0x04000000, /* permission bit for non-backed-up files */ + P9PROTO_DMSYMLINK = 0x02000000, /* permission bit for symbolic link (9P2000.u) */ + P9PROTO_DMLINK = 0x01000000, /* permission bit for hard-link (9P2000.u) */ + P9PROTO_DMDEVICE = 0x00800000, /* permission bit for device files (9P2000.u) */ + P9PROTO_DMNAMEDPIPE = 0x00200000,/* permission bit for named pipe (9P2000.u) */ + P9PROTO_DMSOCKET = 0x00100000, /* permission bit for socket (9P2000.u) */ + P9PROTO_DMSETUID = 0x00080000, /* permission bit for setuid (9P2000.u) */ + P9PROTO_DMSETGID = 0x00040000, /* permission bit for setgid (9P2000.u) */ + P9PROTO_DMSETVTX = 0x00010000, /* permission bit for sticky bit (9P2000.u) */ +}; + +/* + * QID types - they are primarly used to + * differentiate semantics for a file system + */ +enum p9_qid_t { + P9PROTO_QTDIR = 0x80, /* directory */ + P9PROTO_QTAPPEND = 0x40, /* append-only */ + P9PROTO_QTEXCL = 0x20, /* exclusive use (only one open handle allowed)*/ + P9PROTO_QTMOUNT = 0x10, /* mount points */ + P9PROTO_QTAUTH = 0x08, /* authentication file */ + P9PROTO_QTTMP = 0x04, /* non-backed-up files */ + P9PROTO_QTSYMLINK = 0x02, /* symbolic links */ + P9PROTO_QTLINK = 0x01, /* hard link */ + P9PROTO_QTFILE = 0x00, /* normal files */ +}; + +/* P9 Magic Numbers */ +#define P9PROTO_NOFID (uint32_t)(~0) +#define P9_DEFUNAME "nobody" +#define P9_DEFANAME "" +#define P9_NONUNAME (uint32_t)(~0) +#define P9_MAXWELEM 16 + +/* Exchange unit between Qemu and Client */ +struct p9_qid { + uint8_t type; /* the type of the file */ + uint32_t version; /* version number for given path */ + uint64_t path; /* the file servers unique id for file */ +}; + +/* FS information stat structure */ +struct p9_statfs { + uint32_t type; /* type of file system */ + uint32_t bsize; /* optimal transfer block size */ + uint64_t blocks; /* total data blocks in file system */ + uint64_t bfree; /* free blocks in fs */ + uint64_t bavail; /* free blocks avail to non-superuser */ + uint64_t files; /* total file nodes in file system */ + uint64_t ffree; /* free file nodes in fs */ + uint64_t fsid; /* file system id */ + uint32_t namelen; /* maximum length of filenames */ +}; + + +/* File system metadata information */ +struct p9_wstat { + uint16_t size; /* total byte count of the following data */ + uint16_t type; /* type of file */ + uint32_t dev; /* id of device containing file */ + struct p9_qid qid; /* identifier used by server for file system entity information */ + uint32_t mode; /* protection */ + uint32_t atime; /* time of last access */ + uint32_t mtime; /* time of last modification */ + uint64_t length; /* length of file in bytes */ + char *name; /* file name */ + char *uid; /* user ID of owner */ + char *gid; /* group ID of owner */ + char *muid; /* name of the user who last modified the file */ + char *extension; /* 9p2000.u extensions */ + uid_t n_uid; /* 9p2000.u extensions */ + gid_t n_gid; /* 9p2000.u extensions */ + uid_t n_muid; /* 9p2000.u extensions */ +}; + +/* The linux version of FS information stat structure*/ +struct p9_stat_dotl { + uint64_t st_result_mask;/* indicates fields that are requested */ + struct p9_qid qid; /* identifier used by server for file system entity information */ + uint32_t st_mode; /* protection */ + uid_t st_uid; /* user ID of owner */ + gid_t st_gid; /* group ID of owner */ + uint64_t st_nlink; /* number of hard links */ + uint64_t st_rdev; /* device ID (if special file) */ + uint64_t st_size; /* total size, in bytes */ + uint64_t st_blksize; /* blocksize for file system I/O */ + uint64_t st_blocks; /* number of 512B blocks allocated */ + uint64_t st_atime_sec; /* time of last access, seconds */ + uint64_t st_atime_nsec; /* time of last access, nanoseconds */ + uint64_t st_mtime_sec; /* time of last modification, seconds */ + uint64_t st_mtime_nsec; /* time of last modifictaion, nanoseconds */ + uint64_t st_ctime_sec; /* time of last status change, seconds*/ + uint64_t st_ctime_nsec; /* time of last status change, nanoseconds*/ + uint64_t st_btime_sec; /* following memebers are reserved for future use */ + uint64_t st_btime_nsec; + uint64_t st_gen; + uint64_t st_data_version; +}; + +/* P9 inode attribute for setattr */ +struct p9_iattr_dotl { + uint32_t valid; /* bit fields specifying which fields are valid */ + uint32_t mode; /* protection */ + uid_t uid; /* user id of owner */ + gid_t gid; /* group id */ + uint64_t size; /* file size */ + uint64_t atime_sec; /* last access time in seconds */ + uint64_t atime_nsec; /* last access time in nanoseconds */ + uint64_t mtime_sec; /* last modification time in seconds */ + uint64_t mtime_nsec; /* last modification time in nanoseconds */ +}; + +#define P9PROTO_STATS_MODE 0x00000001ULL +#define P9PROTO_STATS_NLINK 0x00000002ULL +#define P9PROTO_STATS_UID 0x00000004ULL +#define P9PROTO_STATS_GID 0x00000008ULL +#define P9PROTO_STATS_RDEV 0x00000010ULL +#define P9PROTO_STATS_ATIME 0x00000020ULL +#define P9PROTO_STATS_MTIME 0x00000040ULL +#define P9PROTO_STATS_CTIME 0x00000080ULL +#define P9PROTO_STATS_INO 0x00000100ULL +#define P9PROTO_STATS_SIZE 0x00000200ULL +#define P9PROTO_STATS_BLOCKS 0x00000400ULL + +#define P9PROTO_STATS_BTIME 0x00000800ULL +#define P9PROTO_STATS_GEN 0x00001000ULL +#define P9PROTO_STATS_DATA_VERSION 0x00002000ULL + +#define P9PROTO_STATS_BASIC 0x000007ffULL /* Mask for fields up to BLOCKS */ +#define P9PROTO_STATS_ALL 0x00003fffULL /* Mask for All fields above */ + +#define P9PROTO_SETATTR_MODE 0x00000001UL +#define P9PROTO_SETATTR_UID 0x00000002UL +#define P9PROTO_SETATTR_GID 0x00000004UL +#define P9PROTO_SETATTR_SIZE 0x00000008UL +#define P9PROTO_SETATTR_ATIME 0x00000010UL +#define P9PROTO_SETATTR_MTIME 0x00000020UL +#define P9PROTO_SETATTR_CTIME 0x00000040UL +#define P9PROTO_SETATTR_ATIME_SET 0x00000080UL +#define P9PROTO_SETATTR_MTIME_SET 0x00000100UL +#define P9PROTO_SETATTR_MASK 0x000001bfUL + +#define P9PROTO_TGETATTR_BLK 512 + +/* PDU buffer used for SG lists. */ +struct p9_buffer { + uint32_t size; + uint16_t tag; + uint8_t id; + size_t offset; + size_t capacity; + uint8_t *sdata; +}; + +#endif /* FS_P9FS_P9_PROTOCOL_H */ diff --git a/sys/fs/p9fs/p9_protocol.c b/sys/fs/p9fs/p9_protocol.c new file mode 100644 --- /dev/null +++ b/sys/fs/p9fs/p9_protocol.c @@ -0,0 +1,632 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * 9P Protocol Support Code + * This file provides the standard for the FS interactions with the server + * interface as it can understand only this protocol. The details of the + * protocol can be found here + * XXX (link to protocol details page on FreeBSD wiki) + */ + +#include +#include +#include +#include + +#define P9FS_MAXLEN 255 + +static int p9_buf_writef(struct p9_buffer *buf, int proto_version, + const char *fmt, ...); +static void stat_free(struct p9_wstat *sbuf); + +static void +stat_free(struct p9_wstat *stbuf) +{ + + free(stbuf->name, M_TEMP); + free(stbuf->uid, M_TEMP); + free(stbuf->gid, M_TEMP); + free(stbuf->muid, M_TEMP); + free(stbuf->extension, M_TEMP); +} + +static size_t +buf_read(struct p9_buffer *buf, void *data, size_t size) +{ + size_t len; + + len = min(buf->size - buf->offset, size); + + memcpy(data, &buf->sdata[buf->offset], len); + buf->offset += len; + + return (size - len); +} + +static size_t +buf_write(struct p9_buffer *buf, const void *data, size_t size) +{ + size_t len; + + len = min(buf->capacity - buf->size, size); + + memcpy(&buf->sdata[buf->size], data, len); + buf->size += len; + + return (size - len); +} + +/* + * Main buf_read routine. This copies the data from the buffer into the + * respective values based on the data type. + * Here + * b - int8_t + * w - int16_t + * d - int32_t + * q - int64_t + * s - string + * u - uid + * g - gid + * Q - qid + * S - stat + * A - getattr (9P2000.L) + * D - data blob (int32_t size followed by void *, results are not freed) + * T - array of strings (int16_t count, followed by strings) + * R - array of qids (int16_t count, followed by qids) + * ? - return if version is not .u or .l + */ +static int +p9_buf_vreadf(struct p9_buffer *buf, int proto_version, const char *fmt, + va_list ap) +{ + const char *ptr; + int error; + + error = 0; + + for (ptr = fmt; *ptr; ptr++) { + switch (*ptr) { + case 'b': + { + int8_t *val = va_arg(ap, int8_t *); + + if (buf_read(buf, val, sizeof(*val))) + error = EFAULT; + break; + } + case 'w': + { + int16_t *val = va_arg(ap, int16_t *); + + if (buf_read(buf, val, sizeof(*val))) + error = EFAULT; + break; + } + case 'd': + { + int32_t *val = va_arg(ap, int32_t *); + + if (buf_read(buf, val, sizeof(*val))) + error = EFAULT; + break; + } + case 'q': + { + int64_t *val = va_arg(ap, int64_t *); + + if (buf_read(buf, val, sizeof(*val))) + error = EFAULT; + break; + } + case 's': + { + char **sptr_p = va_arg(ap, char **); + uint16_t len; + char *sptr; + + error = buf_read(buf, &len, sizeof(uint16_t)); + if (error) + break; + + sptr = malloc(len + 1, M_TEMP, M_NOWAIT | M_ZERO); + + if (buf_read(buf, sptr, len)) { + error = EFAULT; + free(sptr, M_TEMP); + sptr = NULL; + } else { + (sptr)[len] = 0; + *sptr_p = sptr; + } + break; + } + case 'u': + { + uid_t *val = va_arg(ap, uid_t *); + + if (buf_read(buf, val, sizeof(*val))) + error = EFAULT; + break; + + } + case 'g': + { + gid_t *val = va_arg(ap, gid_t *); + + if (buf_read(buf, val, sizeof(*val))) + error = EFAULT; + break; + + } + case 'Q': + { + struct p9_qid *qid = va_arg(ap, struct p9_qid *); + + error = p9_buf_readf(buf, proto_version, "bdq", + &qid->type, &qid->version, &qid->path); + + break; + } + case 'S': + { + struct p9_wstat *stbuf = va_arg(ap, struct p9_wstat *); + + error = p9_buf_readf(buf, proto_version, "wwdQdddqssss?sddd", + &stbuf->size, &stbuf->type, &stbuf->dev, &stbuf->qid, + &stbuf->mode, &stbuf->atime, &stbuf->mtime, &stbuf->length, + &stbuf->name, &stbuf->uid, &stbuf->gid, &stbuf->muid, + &stbuf->extension, &stbuf->n_uid, &stbuf->n_gid, &stbuf->n_muid); + + if (error != 0) + stat_free(stbuf); + break; + } + case 'A': + { + struct p9_stat_dotl *stbuf = va_arg(ap, struct p9_stat_dotl *); + + error = p9_buf_readf(buf, proto_version, "qQdugqqqqqqqqqqqqqqq", + &stbuf->st_result_mask, &stbuf->qid, &stbuf->st_mode, + &stbuf->st_uid,&stbuf->st_gid, &stbuf->st_nlink, + &stbuf->st_rdev, &stbuf->st_size, &stbuf->st_blksize, + &stbuf->st_blocks, &stbuf->st_atime_sec, + &stbuf->st_atime_nsec, &stbuf->st_mtime_sec, + &stbuf->st_mtime_nsec, &stbuf->st_ctime_sec, + &stbuf->st_ctime_nsec, &stbuf->st_btime_sec, + &stbuf->st_btime_nsec, &stbuf->st_gen, + &stbuf->st_data_version); + + break; + } + case 'D': + { + uint32_t *count = va_arg(ap, uint32_t *); + void **data = va_arg(ap, void **); + + error = buf_read(buf, count, sizeof(uint32_t)); + if (error == 0) { + *count = MIN(*count, buf->size - buf->offset); + *data = &buf->sdata[buf->offset]; + } + break; + } + case 'T': + { + uint16_t *nwname_p = va_arg(ap, uint16_t *); + char ***wnames_p = va_arg(ap, char ***); + uint16_t nwname; + char **wnames; + int i; + + error = buf_read(buf, nwname_p, sizeof(uint16_t)); + if (error != 0) + break; + + nwname = *nwname_p; + wnames = malloc(sizeof(char *) * nwname, M_TEMP, M_NOWAIT | M_ZERO); + + for (i = 0; i < nwname && (error == 0); i++) + error = p9_buf_readf(buf, proto_version, "s", &wnames[i]); + + if (error != 0) { + for (i = 0; i < nwname; i++) + free((wnames)[i], M_TEMP); + free(wnames, M_TEMP); + } else + *wnames_p = wnames; + break; + } + case 'R': + { + uint16_t *nwqid_p = va_arg(ap, uint16_t *); + struct p9_qid **wqids_p = va_arg(ap, struct p9_qid **); + uint16_t nwqid; + struct p9_qid *wqids; + int i; + + wqids = NULL; + error = buf_read(buf, nwqid_p, sizeof(uint16_t)); + if (error != 0) + break; + + nwqid = *nwqid_p; + wqids = malloc(nwqid * sizeof(struct p9_qid), M_TEMP, M_NOWAIT | M_ZERO); + if (wqids == NULL) { + error = ENOMEM; + break; + } + for (i = 0; i < nwqid && (error == 0); i++) + error = p9_buf_readf(buf, proto_version, "Q", &(wqids)[i]); + + if (error != 0) { + free(wqids, M_TEMP); + } else + *wqids_p = wqids; + + break; + } + case '?': + { + if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L)) + return (0); + break; + } + default: + break; + } + + if (error != 0) + break; + } + + return (error); +} + +/* + * Main buf_write routine. This copies the data into the buffer from the + * respective values based on the data type. + * Here + * b - int8_t + * w - int16_t + * d - int32_t + * q - int64_t + * s - string + * u - uid + * g - gid + * Q - qid + * S - stat + * D - data blob (int32_t size followed by void *, results are not freed) + * T - array of strings (int16_t count, followed by strings) + * W - string of a specific length + * R - array of qids (int16_t count, followed by qids) + * A - setattr (9P2000.L) + * ? - return if version is not .u or .l + */ + +int +p9_buf_vwritef(struct p9_buffer *buf, int proto_version, const char *fmt, + va_list ap) +{ + const char *ptr; + int error; + + error = 0; + + for (ptr = fmt; *ptr; ptr++) { + switch (*ptr) { + case 'b': + { + int8_t val = va_arg(ap, int); + + if (buf_write(buf, &val, sizeof(val))) + error = EFAULT; + break; + } + case 'w': + { + int16_t val = va_arg(ap, int); + + if (buf_write(buf, &val, sizeof(val))) + error = EFAULT; + break; + } + case 'd': + { + int32_t val = va_arg(ap, int32_t); + + if (buf_write(buf, &val, sizeof(val))) + error = EFAULT; + break; + } + case 'q': + { + int64_t val = va_arg(ap, int64_t); + + if (buf_write(buf, &val, sizeof(val))) + error = EFAULT; + + break; + } + case 's': + { + const char *sptr = va_arg(ap, const char *); + uint16_t len = 0; + + if (sptr) + len = MIN(strlen(sptr), P9FS_MAXLEN); + + error = buf_write(buf, &len, sizeof(uint16_t)); + if (error == 0 && buf_write(buf, sptr, len)) + error = EFAULT; + break; + } + case 'u': + { + uid_t val = va_arg(ap, uid_t); + + if (buf_write(buf, &val, sizeof(val))) + error = EFAULT; + break; + + } + case 'g': + { + gid_t val = va_arg(ap, gid_t); + + if (buf_write(buf, &val, sizeof(val))) + error = EFAULT; + break; + + } + case 'Q': + { + const struct p9_qid *qid = va_arg(ap, const struct p9_qid *); + + error = p9_buf_writef(buf, proto_version, "bdq", + qid->type, qid->version, qid->path); + break; + } + case 'S': + { + struct p9_wstat *stbuf = va_arg(ap, struct p9_wstat *); + + error = p9_buf_writef(buf, proto_version, + "wwdQdddqssss?sddd", stbuf->size, stbuf->type, stbuf->dev, &stbuf->qid, + stbuf->mode, stbuf->atime, stbuf->mtime, stbuf->length, stbuf->name, + stbuf->uid, stbuf->gid, stbuf->muid, stbuf->extension, stbuf->n_uid, + stbuf->n_gid, stbuf->n_muid); + + if (error != 0) + stat_free(stbuf); + + break; + } + case 'D': + { + uint32_t count = va_arg(ap, uint32_t); + void *data = va_arg(ap, void *); + + error = buf_write(buf, &count, sizeof(uint32_t)); + if ((error == 0) && buf_write(buf, data, count)) + error = EFAULT; + + break; + } + case 'T': + { + char **wnames = va_arg(ap, char **); + uint16_t nwnames = va_arg(ap, int); + + error = buf_write(buf, &nwnames, sizeof(uint16_t)); + if (error == 0) { + int i = 0; + for (i = 0; i < nwnames; i++) { + error = p9_buf_writef(buf, proto_version, "s", wnames[i]); + if (error != 0) + break; + } + } + break; + } + case 'W': + { + const char *sptr = va_arg(ap, const char*); + uint16_t len = va_arg(ap, int); + + error = buf_write(buf, &len, sizeof(uint16_t)); + if (error == 0 && buf_write(buf, sptr, len)) + error = EFAULT; + break; + + } + case 'R': + { + uint16_t nwqid = va_arg(ap, int); + struct p9_qid *wqids = va_arg(ap, struct p9_qid *); + int i; + + error = buf_write(buf, &nwqid, sizeof(uint16_t)); + if (error == 0) { + + for (i = 0; i < nwqid; i++) { + error = p9_buf_writef(buf, proto_version, "Q", &wqids[i]); + if (error != 0) + break; + } + } + break; + } + case 'A': + { + struct p9_iattr_dotl *p9attr = va_arg(ap, struct p9_iattr_dotl *); + + error = p9_buf_writef(buf, proto_version, "ddugqqqqq", + p9attr->valid, p9attr->mode, p9attr->uid, + p9attr->gid, p9attr->size, p9attr->atime_sec, + p9attr->atime_nsec, p9attr->mtime_sec, + p9attr->mtime_nsec); + + break; + } + case '?': + { + if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L)) + return (0); + break; + } + default: + break; + } + + if (error != 0) + break; + } + + return (error); +} + +/* Variadic form of buf_read */ +int +p9_buf_readf(struct p9_buffer *buf, int proto_version, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = p9_buf_vreadf(buf, proto_version, fmt, ap); + va_end(ap); + + return (ret); +} + +/* Variadic form of buf_write */ +static int +p9_buf_writef(struct p9_buffer *buf, int proto_version, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = p9_buf_vwritef(buf, proto_version, fmt, ap); + va_end(ap); + + return (ret); +} + +/* File stats read routine for P9 to get attributes of files */ +int +p9stat_read(struct p9_client *clnt, char *buf, size_t len, struct p9_wstat *st) +{ + struct p9_buffer msg_buf; + int ret; + + msg_buf.size = len; + msg_buf.capacity = len; + msg_buf.sdata = buf; + msg_buf.offset = 0; + + ret = p9_buf_readf(&msg_buf, clnt->proto_version, "S", st); + if (ret) { + P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, ret); + } + + return (ret); +} + +/* + * P9_header preparation routine. All p9 buffers have to have this header(QEMU_HEADER) at the + * front of the buffer. + */ +int +p9_buf_prepare(struct p9_buffer *buf, int8_t type) +{ + buf->id = type; + return (p9_buf_writef(buf, 0, "dbw", 0, type, buf->tag)); +} + +/* + * Final write to the buffer, this is the total size of the buffer. Since the buffer length can + * vary with request, this is computed at the end just before sending the request to the driver + */ +int +p9_buf_finalize(struct p9_client *clnt, struct p9_buffer *buf) +{ + int size; + int error; + + size = buf->size; + buf->size = 0; + error = p9_buf_writef(buf, 0, "d", size); + buf->size = size; + + P9_DEBUG(LPROTO, "%s: size=%d type: %d tag: %d\n", + __func__, buf->size, buf->id, buf->tag); + + return (error); +} + +/* Reset values of the buffer */ +void +p9_buf_reset(struct p9_buffer *buf) +{ + + buf->offset = 0; + buf->size = 0; +} + +/* + * Directory entry read with the buf we have. Call this once we have the buf to parse. + * This buf, obtained from the server, is parsed to make dirent in readdir. + */ +int +p9_dirent_read(struct p9_client *clnt, char *buf, int start, int len, + struct p9_dirent *dent) +{ + struct p9_buffer msg_buf; + int ret; + char *nameptr; + uint16_t sle; + + msg_buf.size = len; + msg_buf.capacity = len; + msg_buf.sdata = buf; + msg_buf.offset = start; + + ret = p9_buf_readf(&msg_buf, clnt->proto_version, "Qqbs", &dent->qid, + &dent->d_off, &dent->d_type, &nameptr); + if (ret) { + P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, ret); + goto out; + } + + sle = strlen(nameptr); + strncpy(dent->d_name, nameptr, sle); + dent->len = sle; + free(nameptr, M_TEMP); +out: + return (msg_buf.offset); +} diff --git a/sys/fs/p9fs/p9_transport.h b/sys/fs/p9fs/p9_transport.h new file mode 100644 --- /dev/null +++ b/sys/fs/p9fs/p9_transport.h @@ -0,0 +1,53 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* Transport definitions */ +#ifndef FS_P9FS_P9_TRANSPORT_H +#define FS_P9FS_P9_TRANSPORT_H + +#include + +struct p9_req_t; + +/* Tranport module interface */ +struct p9_trans_module { + TAILQ_ENTRY(p9_trans_module) link; + char *name; /* name of transport */ + /* member function to create a new conection on this transport*/ + int (*create)(const char *mount_tag, void **handlep); + /* member function to terminate a connection on this transport */ + void (*close) (void *handle); + /* member function to issue a request to the transport*/ + int (*request) (void *handle, struct p9_req_t *req); + /* member function to cancel a request if it has been sent */ + int (*cancel) (void *handle, struct p9_req_t *req); +}; + +void p9_register_trans(struct p9_trans_module *m); +void p9_unregister_trans(struct p9_trans_module *m); +struct p9_trans_module *p9_get_trans_by_name(char *s); + +#endif /* FS_P9FS_P9_TRANSPORT_H */ diff --git a/sys/fs/p9fs/p9_transport.c b/sys/fs/p9fs/p9_transport.c new file mode 100644 --- /dev/null +++ b/sys/fs/p9fs/p9_transport.c @@ -0,0 +1,70 @@ +/*- + * Copyright (c) 2022-present Doug Rabson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include + +TAILQ_HEAD(, p9_trans_module) transports; + +static void +p9_transport_init(void) +{ + + TAILQ_INIT(&transports); +} + +SYSINIT(p9_transport, SI_SUB_DRIVERS, SI_ORDER_FIRST, p9_transport_init, NULL); + +void +p9_register_trans(struct p9_trans_module *m) +{ + + TAILQ_INSERT_TAIL(&transports, m, link); +} + +void +p9_unregister_trans(struct p9_trans_module *m) +{ + + TAILQ_REMOVE(&transports, m, link); +} + +struct p9_trans_module * +p9_get_trans_by_name(char *name) +{ + struct p9_trans_module *m; + + TAILQ_FOREACH(m, &transports, link) { + if (strcmp(m->name, name) == 0) + return (m); + } + return (NULL); +} + diff --git a/sys/fs/p9fs/p9fs.h b/sys/fs/p9fs/p9fs.h new file mode 100644 --- /dev/null +++ b/sys/fs/p9fs/p9fs.h @@ -0,0 +1,202 @@ +/*- + * Copyright (c) 2017-2020 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* This file has prototypes specific to the p9fs file system */ + +#ifndef FS_P9FS_P9FS_H +#define FS_P9FS_P9FS_H + +struct p9fs_session; + +/* QID: Unique identification for the file being accessed */ +struct p9fs_qid { + uint8_t qid_mode; /* file mode specifiying file type */ + uint32_t qid_version; /* version of the file */ + uint64_t qid_path; /* unique integer among all files in hierarchy */ +}; + +/* + * The in memory representation of the on disk inode. Save the current + * fields to write it back later. + */ +struct p9fs_inode { + /* Make it simple first, Add more fields later */ + uint64_t i_size; /* size of the inode */ + uint16_t i_type; /* type of inode */ + uint32_t i_dev; /* type of device */ + uint32_t i_mode; /* mode of the inode */ + uint32_t i_atime; /* time of last access */ + uint32_t i_mtime; /* time of last modification */ + uint32_t i_ctime; /* time of last status change */ + uint32_t i_atime_nsec; /* times of last access in nanoseconds resolution */ + uint32_t i_mtime_nsec; /* time of last modification in nanoseconds resolution */ + uint32_t i_ctime_nsec; /* time of last status change in nanoseconds resolution */ + uint64_t i_length; + char *i_name; /* inode name */ + char *i_uid; /* inode user id */ + char *i_gid; /* inode group id */ + char *i_muid; + char *i_extension; /* 9p2000.u extensions */ + uid_t n_uid; /* 9p2000.u extensions */ + gid_t n_gid; /* 9p2000.u extensions */ + uid_t n_muid; /* 9p2000.u extensions */ + /* bookkeeping info on the client. */ + uint16_t i_links_count; /*number of references to the inode*/ + uint64_t i_qid_path; /* using inode number for reference. */ + uint64_t i_flags; + uint64_t blksize; /* block size for file system */ + uint64_t blocks; /* number of 512B blocks allocated */ + uint64_t gen; /* reserved for future use */ + uint64_t data_version; /* reserved for future use */ + +}; + +#define P9FS_VFID_MTX(_sc) (&(_sc)->vfid_mtx) +#define P9FS_VFID_LOCK(_sc) mtx_lock(P9FS_VFID_MTX(_sc)) +#define P9FS_VFID_UNLOCK(_sc) mtx_unlock(P9FS_VFID_MTX(_sc)) +#define P9FS_VFID_LOCK_INIT(_sc) mtx_init(P9FS_VFID_MTX(_sc), \ + "VFID List lock", NULL, MTX_DEF) +#define P9FS_VFID_LOCK_DESTROY(_sc) mtx_destroy(P9FS_VFID_MTX(_sc)) + +#define P9FS_VOFID_MTX(_sc) (&(_sc)->vofid_mtx) +#define P9FS_VOFID_LOCK(_sc) mtx_lock(P9FS_VOFID_MTX(_sc)) +#define P9FS_VOFID_UNLOCK(_sc) mtx_unlock(P9FS_VOFID_MTX(_sc)) +#define P9FS_VOFID_LOCK_INIT(_sc) mtx_init(P9FS_VOFID_MTX(_sc), \ + "VOFID List lock", NULL, MTX_DEF) +#define P9FS_VOFID_LOCK_DESTROY(_sc) mtx_destroy(P9FS_VOFID_MTX(_sc)) + +#define VFID 0x01 +#define VOFID 0x02 + +/* A Plan9 node. */ +struct p9fs_node { + STAILQ_HEAD( ,p9_fid) vfid_list; /* vfid related to uid */ + struct mtx vfid_mtx; /* mutex for vfid list */ + STAILQ_HEAD( ,p9_fid) vofid_list; /* vofid related to uid */ + struct mtx vofid_mtx; /* mutex for vofid list */ + struct p9fs_node *parent; /* pointer to parent p9fs node */ + struct p9fs_qid vqid; /* the server qid, will be from the host */ + struct vnode *v_node; /* vnode for this fs_node. */ + struct p9fs_inode inode; /* in memory representation of ondisk information*/ + struct p9fs_session *p9fs_ses; /* Session_ptr for this node */ + STAILQ_ENTRY(p9fs_node) p9fs_node_next; + uint64_t flags; +}; + +#define P9FS_VTON(vp) ((vp)->v_data) +#define P9FS_NTOV(node) ((node)->v_node) +#define VFSTOP9(mp) ((mp)->mnt_data) +#define QEMU_DIRENTRY_SZ 25 +#define P9FS_NODE_MODIFIED 0x1 /* indicating file change */ +#define P9FS_ROOT 0x2 /* indicating root p9fs node */ +#define P9FS_NODE_DELETED 0x4 /* indicating file or directory delete */ +#define P9FS_NODE_IN_SESSION 0x8 /* p9fs_node is in the session - virt_node_list */ +#define IS_ROOT(node) (node->flags & P9FS_ROOT) + +#define P9FS_SET_LINKS(inode) do { \ + (inode)->i_links_count = 1; \ +} while (0) \ + +#define P9FS_INCR_LINKS(inode) do { \ + (inode)->i_links_count++; \ +} while (0) \ + +#define P9FS_DECR_LINKS(inode) do { \ + (inode)->i_links_count--; \ +} while (0) \ + +#define P9FS_CLR_LINKS(inode) do { \ + (inode)->i_links_count = 0; \ +} while (0) \ + +#define P9FS_MTX(_sc) (&(_sc)->p9fs_mtx) +#define P9FS_LOCK(_sc) mtx_lock(P9FS_MTX(_sc)) +#define P9FS_UNLOCK(_sc) mtx_unlock(P9FS_MTX(_sc)) +#define P9FS_LOCK_INIT(_sc) mtx_init(P9FS_MTX(_sc), \ + "P9FS session chain lock", NULL, MTX_DEF) +#define P9FS_LOCK_DESTROY(_sc) mtx_destroy(P9FS_MTX(_sc)) + +/* Session structure for the FS */ +struct p9fs_session { + unsigned char flags; /* these flags for the session */ + struct mount *p9fs_mount; /* mount point */ + struct p9fs_node rnp; /* root p9fs node for this session */ + uid_t uid; /* the uid that has access */ + const char *uname; /* user name to mount as */ + const char *aname; /* name of remote file tree being mounted */ + struct p9_client *clnt; /* 9p client */ + struct mtx p9fs_mtx; /* mutex used for guarding the chain.*/ + STAILQ_HEAD( ,p9fs_node) virt_node_list; /* list of p9fs nodes in this session*/ + struct p9_fid *mnt_fid; /* to save nobody 's fid for unmounting as root user */ +}; + +struct p9fs_mount { + struct p9fs_session p9fs_session; /* per instance session information */ + struct mount *p9fs_mountp; /* mount point */ + int mount_tag_len; /* length of the mount tag */ + char *mount_tag; /* mount tag used */ +}; + +/* All session flags based on 9p versions */ +enum virt_session_flags { + P9FS_PROTO_2000U = 0x01, + P9FS_PROTO_2000L = 0x02, +}; + +/* Session access flags */ +#define P9_ACCESS_ANY 0x04 /* single attach for all users */ +#define P9_ACCESS_SINGLE 0x08 /* access to only the user who mounts */ +#define P9_ACCESS_USER 0x10 /* new attach established for every user */ +#define P9_ACCESS_MASK (P9_ACCESS_ANY|P9_ACCESS_SINGLE|P9_ACCESS_USER) + +u_quad_t p9fs_round_filesize_to_bytes(uint64_t filesize, uint64_t bsize); +u_quad_t p9fs_pow2_filesize_to_bytes(uint64_t filesize, uint64_t bsize); + +/* These are all the P9FS specific vops */ +int p9fs_stat_vnode_l(void); +int p9fs_stat_vnode_dotl(struct p9_stat_dotl *st, struct vnode *vp); +int p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred); +int p9fs_proto_dotl(struct p9fs_session *vses); +struct p9_fid *p9fs_init_session(struct mount *mp, int *error); +void p9fs_close_session(struct mount *mp); +void p9fs_prepare_to_close(struct mount *mp); +void p9fs_complete_close(struct mount *mp); +int p9fs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp); +int p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags, + struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp, + char *name); +int p9fs_node_cmp(struct vnode *vp, void *arg); +void p9fs_dispose_node(struct p9fs_node **npp); +void p9fs_cleanup(struct p9fs_node *vp); +void p9fs_fid_remove_all(struct p9fs_node *np, int leave_ofids); +void p9fs_fid_remove(struct p9fs_node *np, struct p9_fid *vfid, + int fid_type); +void p9fs_fid_add(struct p9fs_node *np, struct p9_fid *fid, + int fid_type); +struct p9_fid *p9fs_get_fid(struct p9_client *clnt, + struct p9fs_node *np, struct ucred *cred, int fid_type, int mode, int *error); + +#endif /* FS_P9FS_P9FS_H */ diff --git a/sys/fs/p9fs/p9fs_proto.h b/sys/fs/p9fs/p9fs_proto.h new file mode 100644 --- /dev/null +++ b/sys/fs/p9fs/p9fs_proto.h @@ -0,0 +1,42 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +/* + * Plan9 filesystem (9P2000.u) protocol definitions. + */ + +#ifndef FS_P9FS_P9FS_PROTO_H +#define FS_P9FS_P9FS_PROTO_H + +//#include + +/* File permissions */ +#define P9FS_OREAD 0 +#define P9FS_OWRITE 1 +#define P9FS_ORDWR 2 +#define P9FS_OEXEC 3 +#define P9FS_OTRUNC 0x10 + +#endif /* FS_P9FS_P9FS_PROTO_H */ diff --git a/sys/fs/p9fs/p9fs_subr.c b/sys/fs/p9fs/p9fs_subr.c new file mode 100644 --- /dev/null +++ b/sys/fs/p9fs/p9fs_subr.c @@ -0,0 +1,429 @@ +/*- + * Copyright (c) 2017 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +/*- + * 9P filesystem subroutines. This file consists of all the Non VFS subroutines. + * It contains all of the functions related to the driver submission which form + * the upper layer i.e, p9fs driver. This will interact with the client to make + * sure we have correct API calls in the header. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "p9fs_proto.h" + +#include +#include +#include +#include + +int +p9fs_proto_dotl(struct p9fs_session *vses) +{ + + return (vses->flags & P9FS_PROTO_2000L); +} + +/* Initialize a p9fs session */ +struct p9_fid * +p9fs_init_session(struct mount *mp, int *error) +{ + struct p9fs_session *vses; + struct p9fs_mount *virtmp; + struct p9_fid *fid; + char *access; + + virtmp = VFSTOP9(mp); + vses = &virtmp->p9fs_session; + vses->uid = P9_NONUNAME; + vses->uname = P9_DEFUNAME; + vses->aname = P9_DEFANAME; + + /* + * Create the client structure. Call into the driver to create + * driver structures for the actual IO transfer. + */ + vses->clnt = p9_client_create(mp, error, virtmp->mount_tag); + + if (vses->clnt == NULL) { + P9_DEBUG(ERROR, "%s: p9_client_create failed\n", __func__); + return (NULL); + } + /* + * Find the client version and cache the copy. We will use this copy + * throughout FS layer. + */ + if (p9_is_proto_dotl(vses->clnt)) + vses->flags |= P9FS_PROTO_2000L; + else if (p9_is_proto_dotu(vses->clnt)) + vses->flags |= P9FS_PROTO_2000U; + + /* Set the access mode */ + access = vfs_getopts(mp->mnt_optnew, "access", error); + if (access == NULL) + vses->flags |= P9_ACCESS_USER; + else if (!strcmp(access, "any")) + vses->flags |= P9_ACCESS_ANY; + else if (!strcmp(access, "single")) + vses->flags |= P9_ACCESS_SINGLE; + else if (!strcmp(access, "user")) + vses->flags |= P9_ACCESS_USER; + else { + P9_DEBUG(ERROR, "%s: unknown access mode\n", __func__); + *error = EINVAL; + goto out; + } + + *error = 0; + /* Attach with the backend host*/ + fid = p9_client_attach(vses->clnt, NULL, vses->uname, P9_NONUNAME, + vses->aname, error); + vses->mnt_fid = fid; + + if (*error != 0) { + P9_DEBUG(ERROR, "%s: attach failed: %d\n", __func__, *error); + goto out; + } + P9_DEBUG(SUBR, "%s: attach successful fid :%p\n", __func__, fid); + fid->uid = vses->uid; + + /* initialize the node list for the session */ + STAILQ_INIT(&vses->virt_node_list); + P9FS_LOCK_INIT(vses); + + P9_DEBUG(SUBR, "%s: INIT session successful\n", __func__); + + return (fid); +out: + p9_client_destroy(vses->clnt); + return (NULL); +} + +/* Begin to terminate a session */ +void +p9fs_prepare_to_close(struct mount *mp) +{ + struct p9fs_session *vses; + struct p9fs_mount *vmp; + struct p9fs_node *np, *pnp, *tmp; + + vmp = VFSTOP9(mp); + vses = &vmp->p9fs_session; + + /* break the node->parent references */ + STAILQ_FOREACH_SAFE(np, &vses->virt_node_list, p9fs_node_next, tmp) { + if (np->parent && np->parent != np) { + pnp = np->parent; + np->parent = NULL; + vrele(P9FS_NTOV(pnp)); + } + } + + /* We are about to teardown, we dont allow anything other than clunk after this.*/ + p9_client_begin_disconnect(vses->clnt); +} + +/* Shutdown a session */ +void +p9fs_complete_close(struct mount *mp) +{ + struct p9fs_session *vses; + struct p9fs_mount *vmp; + + vmp = VFSTOP9(mp); + vses = &vmp->p9fs_session; + + /* Finish the close*/ + p9_client_disconnect(vses->clnt); +} + + +/* Call from unmount. Close the session. */ +void +p9fs_close_session(struct mount *mp) +{ + struct p9fs_session *vses; + struct p9fs_mount *vmp; + struct p9fs_node *p, *tmp; + + vmp = VFSTOP9(mp); + vses = &vmp->p9fs_session; + + /* + * Cleanup the leftover p9fs nodes in this session. This could be all + * removed, unlinked p9fs nodes on the host. + */ + P9FS_LOCK(vses); + STAILQ_FOREACH_SAFE(p, &vses->virt_node_list, p9fs_node_next, tmp) { + + p9fs_cleanup(p); + } + P9FS_UNLOCK(vses); + p9fs_complete_close(mp); + /* Clean up the clnt structure. */ + p9_client_destroy(vses->clnt); + P9FS_LOCK_DESTROY(vses); + P9_DEBUG(SUBR, "%s: Clean close session .\n", __func__); +} + +/* + * Remove all the fids of a particular type from a p9fs node + * as well as destroy/clunk them. + */ +void +p9fs_fid_remove_all(struct p9fs_node *np, int leave_ofids) +{ + struct p9_fid *fid, *tfid; + + STAILQ_FOREACH_SAFE(fid, &np->vfid_list, fid_next, tfid) { + STAILQ_REMOVE(&np->vfid_list, fid, p9_fid, fid_next); + p9_client_clunk(fid); + } + + if (!leave_ofids) { + STAILQ_FOREACH_SAFE(fid, &np->vofid_list, fid_next, tfid) { + STAILQ_REMOVE(&np->vofid_list, fid, p9_fid, fid_next); + p9_client_clunk(fid); + } + } +} + + +/* Remove a fid from its corresponding fid list */ +void +p9fs_fid_remove(struct p9fs_node *np, struct p9_fid *fid, int fid_type) +{ + + switch (fid_type) { + case VFID: + P9FS_VFID_LOCK(np); + STAILQ_REMOVE(&np->vfid_list, fid, p9_fid, fid_next); + P9FS_VFID_UNLOCK(np); + break; + case VOFID: + P9FS_VOFID_LOCK(np); + STAILQ_REMOVE(&np->vofid_list, fid, p9_fid, fid_next); + P9FS_VOFID_UNLOCK(np); + break; + } +} + +/* Add a fid to the corresponding fid list */ +void +p9fs_fid_add(struct p9fs_node *np, struct p9_fid *fid, int fid_type) +{ + + switch (fid_type) { + case VFID: + P9FS_VFID_LOCK(np); + STAILQ_INSERT_TAIL(&np->vfid_list, fid, fid_next); + P9FS_VFID_UNLOCK(np); + break; + case VOFID: + P9FS_VOFID_LOCK(np); + STAILQ_INSERT_TAIL(&np->vofid_list, fid, fid_next); + P9FS_VOFID_UNLOCK(np); + break; + } +} + +/* Build the path from root to current directory */ +static int +p9fs_get_full_path(struct p9fs_node *np, char ***names) +{ + int i, n; + struct p9fs_node *node; + char **wnames; + + n = 0; + for (node = np ; (node != NULL) && !IS_ROOT(node) ; node = node->parent) + n++; + + if (node == NULL) + return (0); + + wnames = malloc(n * sizeof(char *), M_TEMP, M_ZERO|M_WAITOK); + + for (i = n-1, node = np; i >= 0 ; i--, node = node->parent) + wnames[i] = node->inode.i_name; + + *names = wnames; + return (n); +} + +/* + * Return TRUE if this fid can be used for the requested mode. + */ +static int +p9fs_compatible_mode(struct p9_fid *fid, int mode) +{ + /* + * Return TRUE for an exact match. For OREAD and OWRITE, allow + * existing ORDWR fids to match. Only check the low two bits + * of mode. + * + * TODO: figure out if this is correct for O_APPEND + */ + int fid_mode = fid->mode & 3; + if (fid_mode == mode) + return (TRUE); + if (fid_mode == P9PROTO_ORDWR) + return (mode == P9PROTO_OREAD || mode == P9PROTO_OWRITE); + return (FALSE); +} + +/* + * Retrieve fid structure corresponding to a particular + * uid and fid type for a p9fs node + */ +static struct p9_fid * +p9fs_get_fid_from_uid(struct p9fs_node *np, uid_t uid, int fid_type, int mode) +{ + struct p9_fid *fid; + + switch (fid_type) { + case VFID: + P9FS_VFID_LOCK(np); + STAILQ_FOREACH(fid, &np->vfid_list, fid_next) { + if (fid->uid == uid) { + P9FS_VFID_UNLOCK(np); + return (fid); + } + } + P9FS_VFID_UNLOCK(np); + break; + case VOFID: + P9FS_VOFID_LOCK(np); + STAILQ_FOREACH(fid, &np->vofid_list, fid_next) { + if (fid->uid == uid && p9fs_compatible_mode(fid, mode)) { + P9FS_VOFID_UNLOCK(np); + return (fid); + } + } + P9FS_VOFID_UNLOCK(np); + break; + } + + return (NULL); +} + +/* + * Function returns the fid sturcture for a file corresponding to current user id. + * First it searches in the fid list of the corresponding p9fs node. + * New fid will be created if not already present and added in the corresponding + * fid list in the p9fs node. + * If the user is not already attached then this will attach the user first + * and then create a new fid for this particular file by doing dir walk. + */ +struct p9_fid * +p9fs_get_fid(struct p9_client *clnt, struct p9fs_node *np, struct ucred *cred, + int fid_type, int mode, int *error) +{ + uid_t uid; + struct p9_fid *fid, *oldfid; + struct p9fs_node *root; + struct p9fs_session *vses; + int i, l, clone; + char **wnames = NULL; + uint16_t nwnames; + + oldfid = NULL; + vses = np->p9fs_ses; + + if (vses->flags & P9_ACCESS_ANY) + uid = vses->uid; + else if (cred) + uid = cred->cr_uid; + else + uid = 0; + + /* + * Search for the fid in corresponding fid list. + * We should return NULL for VOFID if it is not present in the list. + * Because VOFID should have been created during the file open. + * If VFID is not present in the list then we should create one. + */ + fid = p9fs_get_fid_from_uid(np, uid, fid_type, mode); + if (fid != NULL || fid_type == VOFID) + return (fid); + + /* Check root if the user is attached */ + root = &np->p9fs_ses->rnp; + fid = p9fs_get_fid_from_uid(root, uid, fid_type, mode); + if(fid == NULL) { + /* Attach the user */ + fid = p9_client_attach(clnt, NULL, NULL, uid, + vses->aname, error); + if (*error != 0) + return (NULL); + p9fs_fid_add(root, fid, fid_type); + } + + /* If we are looking for root then return it */ + if (IS_ROOT(np)) + return (fid); + + /* Get full path from root to p9fs node */ + nwnames = p9fs_get_full_path(np, &wnames); + + /* + * Could not get full path. + * If p9fs node is not deleted, parent should exist. + */ + KASSERT(nwnames != 0, ("%s: Directory of %s doesn't exist", __func__, np->inode.i_name)); + + clone = 1; + i = 0; + while (i < nwnames) { + l = MIN(nwnames - i, P9_MAXWELEM); + + fid = p9_client_walk(fid, l, wnames, clone, error); + if (*error != 0) { + if (oldfid) + p9_client_clunk(oldfid); + fid = NULL; + goto bail_out; + } + oldfid = fid; + clone = 0; + i += l ; + } + p9fs_fid_add(np, fid, fid_type); +bail_out: + free(wnames, M_TEMP); + return (fid); +} diff --git a/sys/fs/p9fs/p9fs_vfsops.c b/sys/fs/p9fs/p9fs_vfsops.c new file mode 100644 --- /dev/null +++ b/sys/fs/p9fs/p9fs_vfsops.c @@ -0,0 +1,598 @@ +/*- + * Copyright (c) 2017-2020 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file consists of all the VFS interactions of VFS ops which include + * mount, unmount, initilaize etc. for p9fs. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +SYSCTL_NODE(_vfs, OID_AUTO, p9fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "Plan 9 filesystem"); + +/* This count is static now. Can be made tunable later */ +#define P9FS_FLUSH_RETRIES 10 + +static MALLOC_DEFINE(M_P9MNT, "p9fs_mount", "Mount structures for p9fs"); +static uma_zone_t p9fs_node_zone; +uma_zone_t p9fs_io_buffer_zone; +uma_zone_t p9fs_getattr_zone; +uma_zone_t p9fs_setattr_zone; +extern struct vop_vector p9fs_vnops; + +/* option parsing */ +static const char *p9fs_opts[] = { + "from", "trans", "access", NULL +}; + +/* Dispose p9fs node, freeing it to the UMA zone */ +void +p9fs_dispose_node(struct p9fs_node **npp) +{ + struct p9fs_node *node; + struct vnode *vp; + + node = *npp; + + if (node == NULL) + return; + + if (node->parent && node->parent != node) { + vrele(P9FS_NTOV(node->parent)); + } + + P9_DEBUG(VOPS, "%s: node: %p\n", __func__, *npp); + + vp = P9FS_NTOV(node); + vp->v_data = NULL; + + /* Free our associated memory */ + if (!(vp->v_vflag & VV_ROOT)) { + free(node->inode.i_name, M_TEMP); + uma_zfree(p9fs_node_zone, node); + } + + *npp = NULL; +} + +/* Initialize memory allocation */ +static int +p9fs_init(struct vfsconf *vfsp) +{ + + p9fs_node_zone = uma_zcreate("p9fs node zone", + sizeof(struct p9fs_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + /* Create the getattr_dotl zone */ + p9fs_getattr_zone = uma_zcreate("p9fs getattr zone", + sizeof(struct p9_stat_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + /* Create the setattr_dotl zone */ + p9fs_setattr_zone = uma_zcreate("p9fs setattr zone", + sizeof(struct p9_iattr_dotl), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + /* + * Create the io_buffer zone pool to keep things simpler in case of + * multiple threads. Each thread works with its own so there is no + * contention. + */ + p9fs_io_buffer_zone = uma_zcreate("p9fs io_buffer zone", + P9FS_MTU, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + return (0); +} + +/* Destroy all the allocated memory */ +static int +p9fs_uninit(struct vfsconf *vfsp) +{ + + uma_zdestroy(p9fs_node_zone); + uma_zdestroy(p9fs_io_buffer_zone); + uma_zdestroy(p9fs_getattr_zone); + uma_zdestroy(p9fs_setattr_zone); + + return (0); +} + +/* Function to umount p9fs */ +static int +p9fs_unmount(struct mount *mp, int mntflags) +{ + struct p9fs_mount *vmp; + struct p9fs_session *vses; + int error, flags, i; + + error = 0; + flags = 0; + vmp = VFSTOP9(mp); + if (vmp == NULL) + return (0); + + vses = &vmp->p9fs_session; + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + p9fs_prepare_to_close(mp); + for (i = 0; i < P9FS_FLUSH_RETRIES; i++) { + + /* Flush everything on this mount point.*/ + error = vflush(mp, 1, flags, curthread); + + if (error == 0 || (mntflags & MNT_FORCE) == 0) + break; + /* Sleep until interrupted or 1 tick expires. */ + error = tsleep(&error, PSOCK, "p9unmnt", 1); + if (error == EINTR) + break; + error = EBUSY; + } + + if (error != 0) + goto out; + p9fs_close_session(mp); + /* Cleanup the mount structure. */ + free(vmp, M_P9MNT); + mp->mnt_data = NULL; + return (error); +out: + /* Restore the flag in case of error */ + vses->clnt->trans_status = P9FS_CONNECT; + return (error); +} + +/* + * Compare qid stored in p9fs node + * Return 1 if does not match otherwise return 0 + */ +int +p9fs_node_cmp(struct vnode *vp, void *arg) +{ + struct p9fs_node *np; + struct p9_qid *qid; + + np = vp->v_data; + qid = (struct p9_qid *)arg; + + if (np->vqid.qid_path == qid->path) { + if (vp->v_vflag & VV_ROOT) + return (0); + else if (np->vqid.qid_mode == qid->type && + np->vqid.qid_version == qid->version) + return (0); + } + + return (1); +} + +/* + * Common code used across p9fs to return vnode for the file represented + * by the fid. + * Lookup for the vnode in hash_list. This lookup is based on the qid path + * which is unique to a file. p9fs_node_cmp is called in this lookup process. + * I. If the vnode we are looking for is found in the hash list + * 1. Check if the vnode is a valid vnode by reloading its stats + * a. if the reloading of the vnode stats returns error then remove the + * vnode from hash list and return + * b. If reloading of vnode stats returns without any error then, clunk the + * new fid which was created for the vnode as we know that the vnode + * already has a fid associated with it and return the vnode. + * This is to avoid fid leaks + * II. If vnode is not found in the hash list then, create new vnode, p9fs + * node and return the vnode + */ +int +p9fs_vget_common(struct mount *mp, struct p9fs_node *np, int flags, + struct p9fs_node *parent, struct p9_fid *fid, struct vnode **vpp, + char *name) +{ + struct p9fs_mount *vmp; + struct p9fs_session *vses; + struct vnode *vp; + struct p9fs_node *node; + struct thread *td; + uint32_t hash; + int error; + struct p9fs_inode *inode; + + td = curthread; + vmp = VFSTOP9(mp); + vses = &vmp->p9fs_session; + + /* Look for vp in the hash_list */ + hash = fnv_32_buf(&fid->qid.path, sizeof(uint64_t), FNV1_32_INIT); + error = vfs_hash_get(mp, hash, flags, td, &vp, p9fs_node_cmp, + &fid->qid); + if (error != 0) + return (error); + else if (vp != NULL) { + if (vp->v_vflag & VV_ROOT) { + if (np == NULL) + p9_client_clunk(fid); + *vpp = vp; + return (0); + } + error = p9fs_reload_stats_dotl(vp, curthread->td_ucred); + if (error != 0) { + node = vp->v_data; + /* Remove stale vnode from hash list */ + vfs_hash_remove(vp); + node->flags |= P9FS_NODE_DELETED; + + vput(vp); + *vpp = NULLVP; + vp = NULL; + } else { + *vpp = vp; + /* Clunk the new fid if not root */ + p9_client_clunk(fid); + return (0); + } + } + + /* + * We must promote to an exclusive lock for vnode creation. This + * can happen if lookup is passed LOCKSHARED. + */ + if ((flags & LK_TYPE_MASK) == LK_SHARED) { + flags &= ~LK_TYPE_MASK; + flags |= LK_EXCLUSIVE; + } + + /* Allocate a new vnode. */ + if ((error = getnewvnode("p9fs", mp, &p9fs_vnops, &vp)) != 0) { + *vpp = NULLVP; + P9_DEBUG(ERROR, "%s: getnewvnode failed: %d\n", __func__, error); + return (error); + } + + /* If we dont have it, create one. */ + if (np == NULL) { + np = uma_zalloc(p9fs_node_zone, M_WAITOK | M_ZERO); + /* Initialize the VFID list */ + P9FS_VFID_LOCK_INIT(np); + STAILQ_INIT(&np->vfid_list); + p9fs_fid_add(np, fid, VFID); + + /* Initialize the VOFID list */ + P9FS_VOFID_LOCK_INIT(np); + STAILQ_INIT(&np->vofid_list); + + vref(P9FS_NTOV(parent)); + np->parent = parent; + np->p9fs_ses = vses; /* Map the current session */ + inode = &np->inode; + /*Fill the name of the file in inode */ + inode->i_name = malloc(strlen(name)+1, M_TEMP, M_NOWAIT | M_ZERO); + strlcpy(inode->i_name, name, strlen(name)+1); + } else { + vp->v_type = VDIR; /* root vp is a directory */ + vp->v_vflag |= VV_ROOT; + vref(vp); /* Increment a reference on root vnode during mount */ + } + + vp->v_data = np; + np->v_node = vp; + inode = &np->inode; + inode->i_qid_path = fid->qid.path; + P9FS_SET_LINKS(inode); + + /* + * Add the p9fs node to the list for cleanup later. + * Cleanup of this p9fs node from the list of session + * p9fs nodes happen in vput() : + * - In vfs_hash_insert() after inserting this node + * to the VFS hash table. + * - In error handling below. + */ + P9FS_LOCK(vses); + STAILQ_INSERT_TAIL(&vses->virt_node_list, np, p9fs_node_next); + P9FS_UNLOCK(vses); + np->flags |= P9FS_NODE_IN_SESSION; + + lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); + error = insmntque(vp, mp); + if (error != 0) { + /* + * vput(vp) is already called from insmntque_stddtr(). + * Just goto 'out' to dispose the node. + */ + goto out; + } + + /* Init the vnode with the disk info*/ + error = p9fs_reload_stats_dotl(vp, curthread->td_ucred); + if (error != 0) { + vput(vp); + goto out; + } + + error = vfs_hash_insert(vp, hash, flags, td, vpp, + p9fs_node_cmp, &fid->qid); + if (error != 0) { + goto out; + } + if (*vpp == NULL) { + *vpp = vp; + } + + return (0); +out: + if (!IS_ROOT(np)) { + /* Destroy the FID LIST locks */ + P9FS_VFID_LOCK_DESTROY(np); + P9FS_VOFID_LOCK_DESTROY(np); + } + + /* Something went wrong, dispose the node */ + + /* + * Remove the p9fs_node from the list before we cleanup. + * This should ideally have been removed in vput() above. + * We try again here, incase it is missed from vput(), as + * we added this vnode explicitly to virt_node_list above. + */ + if ((np->flags & P9FS_NODE_IN_SESSION) != 0) { + P9FS_LOCK(vses); + STAILQ_REMOVE(&vses->virt_node_list, np, p9fs_node, p9fs_node_next); + P9FS_UNLOCK(vses); + np->flags &= ~P9FS_NODE_IN_SESSION; + } + p9fs_dispose_node(&np); + *vpp = NULLVP; + return (error); +} + +/* Main mount function for 9pfs */ +static int +p9_mount(struct mount *mp) +{ + struct p9_fid *fid; + struct p9fs_mount *vmp; + struct p9fs_session *vses; + struct p9fs_node *p9fs_root; + int error; + char *from; + int len; + + /* Verify the validity of mount options */ + if (vfs_filteropt(mp->mnt_optnew, p9fs_opts)) + return (EINVAL); + + /* Extract NULL terminated mount tag from mount options */ + error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len); + if (error != 0 || from[len - 1] != '\0') + return (EINVAL); + + /* Allocate and initialize the private mount structure. */ + vmp = malloc(sizeof (struct p9fs_mount), M_P9MNT, M_WAITOK | M_ZERO); + mp->mnt_data = vmp; + vmp->p9fs_mountp = mp; + vmp->mount_tag = from; + vmp->mount_tag_len = len; + vses = &vmp->p9fs_session; + vses->p9fs_mount = mp; + p9fs_root = &vses->rnp; + /* Hardware iosize from the Qemu */ + mp->mnt_iosize_max = PAGE_SIZE; + /* + * Init the session for the p9fs root. This creates a new root fid and + * attaches the client and server. + */ + fid = p9fs_init_session(mp, &error); + if (fid == NULL) { + goto out; + } + + P9FS_VFID_LOCK_INIT(p9fs_root); + STAILQ_INIT(&p9fs_root->vfid_list); + p9fs_fid_add(p9fs_root, fid, VFID); + P9FS_VOFID_LOCK_INIT(p9fs_root); + STAILQ_INIT(&p9fs_root->vofid_list); + p9fs_root->parent = p9fs_root; + p9fs_root->flags |= P9FS_ROOT; + p9fs_root->p9fs_ses = vses; + vfs_getnewfsid(mp); + strlcpy(mp->mnt_stat.f_mntfromname, from, + sizeof(mp->mnt_stat.f_mntfromname)); + MNT_ILOCK(mp); + mp->mnt_flag |= MNT_LOCAL; + mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED; + MNT_IUNLOCK(mp); + P9_DEBUG(VOPS, "%s: Mount successful\n", __func__); + /* Mount structures created. */ + + return (0); +out: + P9_DEBUG(ERROR, "%s: Mount Failed \n", __func__); + if (vmp != NULL) { + free(vmp, M_P9MNT); + mp->mnt_data = NULL; + } + return (error); +} + +/* Mount entry point */ +static int +p9fs_mount(struct mount *mp) +{ + int error; + + /* + * Minimal support for MNT_UPDATE - allow changing from + * readonly. + */ + if (mp->mnt_flag & MNT_UPDATE) { + if ((mp->mnt_flag & MNT_RDONLY) && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { + mp->mnt_flag &= ~MNT_RDONLY; + } + return (0); + } + + error = p9_mount(mp); + if (error != 0) + (void) p9fs_unmount(mp, MNT_FORCE); + + return (error); +} + +/* + * Retrieve the root vnode of this mount. After filesystem is mounted, the root + * vnode is created for the first time. Subsequent calls to p9fs root will + * return the same vnode created during mount. + */ +static int +p9fs_root(struct mount *mp, int lkflags, struct vnode **vpp) +{ + struct p9fs_mount *vmp; + struct p9fs_node *np; + struct p9_client *clnt; + struct p9_fid *vfid; + int error; + + vmp = VFSTOP9(mp); + np = &vmp->p9fs_session.rnp; + clnt = vmp->p9fs_session.clnt; + error = 0; + + P9_DEBUG(VOPS, "%s: node=%p name=%s\n",__func__, np, np->inode.i_name); + + vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error); + + if (error != 0) { + /* for root use the nobody user's fid as vfid. + * This is used while unmounting as root when non-root + * user has mounted p9fs + */ + if (vfid == NULL && clnt->trans_status == P9FS_BEGIN_DISCONNECT) + vfid = vmp->p9fs_session.mnt_fid; + else { + *vpp = NULLVP; + return (error); + } + } + + error = p9fs_vget_common(mp, np, lkflags, np, vfid, vpp, NULL); + if (error != 0) { + *vpp = NULLVP; + return (error); + } + np->v_node = *vpp; + return (error); +} + +/* Retrieve the file system statistics */ +static int +p9fs_statfs(struct mount *mp __unused, struct statfs *buf) +{ + struct p9fs_mount *vmp; + struct p9fs_node *np; + struct p9_client *clnt; + struct p9_fid *vfid; + struct p9_statfs statfs; + int res, error; + + vmp = VFSTOP9(mp); + np = &vmp->p9fs_session.rnp; + clnt = vmp->p9fs_session.clnt; + error = 0; + + vfid = p9fs_get_fid(clnt, np, curthread->td_ucred, VFID, -1, &error); + if (error != 0) { + return (error); + } + + res = p9_client_statfs(vfid, &statfs); + + if (res == 0) { + buf->f_type = statfs.type; + /* + * We have a limit of 4k irrespective of what the + * Qemu server can do. + */ + if (statfs.bsize > PAGE_SIZE) + buf->f_bsize = PAGE_SIZE; + else + buf->f_bsize = statfs.bsize; + + buf->f_iosize = buf->f_bsize; + buf->f_blocks = statfs.blocks; + buf->f_bfree = statfs.bfree; + buf->f_bavail = statfs.bavail; + buf->f_files = statfs.files; + buf->f_ffree = statfs.ffree; + } + else { + /* Atleast set these if stat fail */ + buf->f_bsize = PAGE_SIZE; + buf->f_iosize = buf->f_bsize; /* XXX */ + } + + return (0); +} + +static int +p9fs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) +{ + + return (EINVAL); +} + +struct vfsops p9fs_vfsops = { + .vfs_init = p9fs_init, + .vfs_uninit = p9fs_uninit, + .vfs_mount = p9fs_mount, + .vfs_unmount = p9fs_unmount, + .vfs_root = p9fs_root, + .vfs_statfs = p9fs_statfs, + .vfs_fhtovp = p9fs_fhtovp, +}; + +VFS_SET(p9fs_vfsops, p9fs, VFCF_JAIL); +MODULE_VERSION(p9fs, 1); diff --git a/sys/fs/p9fs/p9fs_vnops.c b/sys/fs/p9fs/p9fs_vnops.c new file mode 100644 --- /dev/null +++ b/sys/fs/p9fs/p9fs_vnops.c @@ -0,0 +1,2140 @@ +/* + * Copyright (c) 2017-2020 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* This file contains VFS file ops for the 9P protocol. + * This makes the upper layer of the p9fs driver. These functions interact + * with the VFS layer and lower layer of p9fs driver which is 9Pnet. All + * the user file operations are handled here. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* File permissions. */ +#define IEXEC 0000100 /* Executable. */ +#define IWRITE 0000200 /* Writeable. */ +#define IREAD 0000400 /* Readable. */ +#define ISVTX 0001000 /* Sticky bit. */ +#define ISGID 0002000 /* Set-gid. */ +#define ISUID 0004000 /* Set-uid. */ + +static MALLOC_DEFINE(M_P9UIOV, "uio", "UIOV structures for strategy in p9fs"); +extern uma_zone_t p9fs_io_buffer_zone; +extern uma_zone_t p9fs_getattr_zone; +extern uma_zone_t p9fs_setattr_zone; +/* For the root vnode's vnops. */ +struct vop_vector p9fs_vnops; + +static uint32_t p9fs_unix2p9_mode(uint32_t mode); + +static void +p9fs_itimes(struct vnode *vp) +{ + struct p9fs_node *node; + struct timespec ts; + struct p9fs_inode *inode; + + node = P9FS_VTON(vp); + inode = &node->inode; + + vfs_timestamp(&ts); + inode->i_mtime = ts.tv_sec; +} + +/* + * Cleanup the p9fs node, the in memory representation of a vnode for p9fs. + * The cleanup includes invalidating all cache entries for the vnode, + * destroying the vobject, removing vnode from hashlist, removing p9fs node + * from the list of session p9fs nodes, and disposing of the p9fs node. + * Basically it is doing a reverse of what a create/vget does. + */ +void +p9fs_cleanup(struct p9fs_node *np) +{ + struct vnode *vp; + struct p9fs_session *vses; + + vp = P9FS_NTOV(np); + vses = np->p9fs_ses; + + /* Remove the vnode from hash list if vnode is not already deleted */ + if ((np->flags & P9FS_NODE_DELETED) == 0) + vfs_hash_remove(vp); + + /* Remove all the FID */ + p9fs_fid_remove_all(np, FALSE); + + /* Destroy the FID LIST locks */ + P9FS_VFID_LOCK_DESTROY(np); + P9FS_VOFID_LOCK_DESTROY(np); + + /* Remove the p9fs_node from the list before we cleanup.*/ + if ((np->flags & P9FS_NODE_IN_SESSION) != 0) { + P9FS_LOCK(vses); + STAILQ_REMOVE(&vses->virt_node_list, np, p9fs_node, p9fs_node_next); + P9FS_UNLOCK(vses); + np->flags &= ~P9FS_NODE_IN_SESSION; + } + + /* Dispose all node knowledge.*/ + p9fs_dispose_node(&np); +} + +/* + * Reclaim VOP is defined to be called for every vnode. This starts off + * the cleanup by clunking(remove the fid on the server) and calls + * p9fs_cleanup to free all the resources allocated for p9fs node. + */ +static int +p9fs_reclaim(struct vop_reclaim_args *ap) +{ + struct vnode *vp; + struct p9fs_node *np; + + vp = ap->a_vp; + np = P9FS_VTON(vp); + + P9_DEBUG(VOPS, "%s: vp:%p node:%p\n", __func__, vp, np); + p9fs_cleanup(np); + + return (0); +} + +/* + * recycle vnodes which are no longer referenced i.e, their usecount is zero + */ +static int +p9fs_inactive(struct vop_inactive_args *ap) +{ + struct vnode *vp; + struct p9fs_node *np; + + vp = ap->a_vp; + np = P9FS_VTON(vp); + + P9_DEBUG(VOPS, "%s: vp:%p node:%p file:%s\n", __func__, vp, np, np->inode.i_name); + if (np->flags & P9FS_NODE_DELETED) + vrecycle(vp); + + return (0); +} + +struct p9fs_lookup_alloc_arg { + struct componentname *cnp; + struct p9fs_node *dnp; + struct p9_fid *newfid; +}; + +/* Callback for vn_get_ino */ +static int +p9fs_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) +{ + struct p9fs_lookup_alloc_arg *p9aa = arg; + + return (p9fs_vget_common(mp, NULL, p9aa->cnp->cn_lkflags, p9aa->dnp, + p9aa->newfid, vpp, p9aa->cnp->cn_nameptr)); +} + +/* + * p9fs_lookup is called for every component name that is being searched for. + * + * I. If component is found on the server, we look for the in-memory + * repesentation(vnode) of this component in namecache. + * A. If the node is found in the namecache, we check is the vnode is still + * valid. + * 1. If it is still valid, return vnode. + * 2. If it is not valid, we remove this vnode from the name cache and + * create a new vnode for the component and return that vnode. + * B. If the vnode is not found in the namecache, we look for it in the + * hash list. + * 1. If the vnode is in the hash list, we check if the vnode is still + * valid. + * a. If it is still valid, we add that vnode to the namecache for + * future lookups and return the vnode. + * b. If it is not valid, create a new vnode and p9fs node, + * initialize them and return the vnode. + * 2. If the vnode is not found in the hash list, we create a new vnode + * and p9fs node, initialize them and return the vnode. + * II. If the component is not found on the server, an error code is returned. + * A. For the creation case, we return EJUSTRETURN so VFS can handle it. + * B. For all other cases, ENOENT is returned. + */ +static int +p9fs_lookup(struct vop_lookup_args *ap) +{ + struct vnode *dvp; + struct vnode **vpp, *vp; + struct componentname *cnp; + struct p9fs_node *dnp; /*dir p9_node */ + struct p9fs_node *np; + struct p9fs_session *vses; + struct mount *mp; /* Get the mount point */ + struct p9_fid *dvfid, *newfid; + int error; + struct vattr vattr; + int flags; + char tmpchr; + + dvp = ap->a_dvp; + vpp = ap->a_vpp; + cnp = ap->a_cnp; + dnp = P9FS_VTON(dvp); + error = 0; + flags = cnp->cn_flags; + *vpp = NULLVP; + + if (dnp == NULL) + return (ENOENT); + + vses = dnp->p9fs_ses; + mp = vses->p9fs_mount; + + /* Do the cache part ourselves */ + if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + return (EROFS); + + if (dvp->v_type != VDIR) + return (ENOTDIR); + + error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread); + if (error) + return (error); + + /* Do the directory walk on host to check if file exist */ + dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error); + if (error) + return (error); + + /* + * Save the character present at namelen in nameptr string and + * null terminate the character to get the search name for p9_dir_walk + * This is done to handle when lookup is for "a" and component + * name contains a/b/c + */ + tmpchr = cnp->cn_nameptr[cnp->cn_namelen]; + cnp->cn_nameptr[cnp->cn_namelen] = '\0'; + + /* + * If the client_walk fails, it means the file looking for doesnt exist. + * Create the file is the flags are set or just return the error + */ + if (cnp->cn_nameptr[0] == '.' && strlen(cnp->cn_nameptr) == 1) { + newfid = p9_client_walk(dvfid, 0, NULL, 1, &error); + } else { + newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error); + } + + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + + if (error != 0 || newfid == NULL) { + /* Clunk the newfid if it is not NULL */ + if (newfid != NULL) + p9_client_clunk(newfid); + + if (error != ENOENT) + return (error); + + /* The requested file was not found. */ + if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && + (flags & ISLASTCN)) { + + if (mp->mnt_flag & MNT_RDONLY) + return (EROFS); + + error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, + curthread); + if (!error) { + return (EJUSTRETURN); + } + } + return (error); + } + + /* Look for the entry in the component cache*/ + error = cache_lookup(dvp, vpp, cnp, NULL, NULL); + if (error > 0 && error != ENOENT) { + P9_DEBUG(VOPS, "%s: Cache lookup error %d \n", __func__, error); + goto out; + } + + if (error == -1) { + vp = *vpp; + /* Check if the entry in cache is stale or not */ + if ((p9fs_node_cmp(vp, &newfid->qid) == 0) && + ((error = VOP_GETATTR(vp, &vattr, cnp->cn_cred)) == 0)) { + goto out; + } + /* + * This case, we have an error coming from getattr, + * act accordingly. + */ + cache_purge(vp); + if (dvp != vp) + vput(vp); + else + vrele(vp); + + *vpp = NULLVP; + } else if (error == ENOENT) { + if (VN_IS_DOOMED(dvp)) + goto out; + if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0) { + error = ENOENT; + goto out; + } + cache_purge_negative(dvp); + } + /* Reset values */ + error = 0; + vp = NULLVP; + + tmpchr = cnp->cn_nameptr[cnp->cn_namelen]; + cnp->cn_nameptr[cnp->cn_namelen] = '\0'; + + /* + * Looks like we have found an entry. Now take care of all other cases. + */ + if (flags & ISDOTDOT) { + struct p9fs_lookup_alloc_arg p9aa; + p9aa.cnp = cnp; + p9aa.dnp = dnp; + p9aa.newfid = newfid; + error = vn_vget_ino_gen(dvp, p9fs_lookup_alloc, &p9aa, 0, &vp); + if (error) + goto out; + *vpp = vp; + } else { + /* + * client_walk is equivalent to searching a component name in a + * directory(fid) here. If new fid is returned, we have found an + * entry for this component name so, go and create the rest of + * the vnode infra(vget_common) for the returned newfid. + */ + if ((cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) + && (flags & ISLASTCN)) { + error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, + curthread); + if (error) + goto out; + + error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags, + dnp, newfid, &vp, cnp->cn_nameptr); + if (error) + goto out; + + *vpp = vp; + np = P9FS_VTON(vp); + if ((dnp->inode.i_mode & ISVTX) && + cnp->cn_cred->cr_uid != 0 && + cnp->cn_cred->cr_uid != dnp->inode.n_uid && + cnp->cn_cred->cr_uid != np->inode.n_uid) { + vput(*vpp); + *vpp = NULL; + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + return (EPERM); + } + } else { + error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags, + dnp, newfid, &vp, cnp->cn_nameptr); + if (error) + goto out; + *vpp = vp; + } + } + + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + + /* Store the result the cache if MAKEENTRY is specified in flags */ + if ((cnp->cn_flags & MAKEENTRY) != 0) + cache_enter(dvp, *vpp, cnp); + return (error); +out: + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + p9_client_clunk(newfid); + return (error); +} + +/* + * Common creation function for file/directory with respective flags. We first + * open the parent directory in order to create the file under it. For this, + * as 9P protocol suggests, we need to call client_walk to create the open fid. + * Once we have the open fid, the file_create function creates the direntry with + * the name and perm specified under the parent dir. If this succeeds (an entry + * is created for the new file on the server), we create our metadata for this + * file (vnode, p9fs node calling vget). Once we are done, we clunk the open + * fid of the parent directory. + */ +static int +create_common(struct p9fs_node *dnp, struct componentname *cnp, + char *extension, uint32_t perm, uint8_t mode, struct vnode **vpp) +{ + char tmpchr; + struct p9_fid *dvfid, *ofid, *newfid; + struct p9fs_session *vses; + struct mount *mp; + int error; + + P9_DEBUG(VOPS, "%s: name %s\n", __func__, cnp->cn_nameptr); + + vses = dnp->p9fs_ses; + mp = vses->p9fs_mount; + newfid = NULL; + error = 0; + + dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error); + if (error != 0) + return (error); + + /* Clone the directory fid to create the new file */ + ofid = p9_client_walk(dvfid, 0, NULL, 1, &error); + if (error != 0) + return (error); + + /* + * Save the character present at namelen in nameptr string and + * null terminate the character to get the search name for p9_dir_walk + */ + tmpchr = cnp->cn_nameptr[cnp->cn_namelen]; + cnp->cn_nameptr[cnp->cn_namelen] = '\0'; + + error = p9_client_file_create(ofid, cnp->cn_nameptr, perm, mode, + extension); + if (error != 0) { + P9_DEBUG(ERROR, "%s: p9_client_fcreate failed %d\n", __func__, error); + goto out; + } + + /* If its not hardlink only then do the walk, else we are done. */ + if (!(perm & P9PROTO_DMLINK)) { + /* + * Do the lookup part and add the vnode, p9fs node. Note that vpp + * is filled in here. + */ + newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error); + if (newfid != NULL) { + error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags, + dnp, newfid, vpp, cnp->cn_nameptr); + if (error != 0) + goto out; + } else { + /* Not found return NOENTRY.*/ + goto out; + } + + if ((cnp->cn_flags & MAKEENTRY) != 0) + cache_enter(P9FS_NTOV(dnp), *vpp, cnp); + } + P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n", + __func__, *vpp, dnp, (uintmax_t)dvfid->fid); + /* Clunk the open ofid. */ + if (ofid != NULL) + (void)p9_client_clunk(ofid); + + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + return (0); +out: + if (ofid != NULL) + (void)p9_client_clunk(ofid); + + if (newfid != NULL) + (void)p9_client_clunk(newfid); + + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + return (error); +} + +/* + * This is the main file creation VOP. Make the permissions of the new + * file and call the create_common common code to complete the create. + */ +static int +p9fs_create(struct vop_create_args *ap) +{ + struct vnode *dvp; + struct vnode **vpp; + struct componentname *cnp; + uint32_t mode; + struct p9fs_node *dnp; + struct p9fs_inode *dinode; + uint32_t perm; + int ret; + + dvp = ap->a_dvp; + vpp = ap->a_vpp; + cnp = ap->a_cnp; + dnp = P9FS_VTON(dvp); + dinode = &dnp->inode; + mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); + perm = p9fs_unix2p9_mode(mode); + + P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp); + + ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp); + if (ret == 0) { + P9FS_INCR_LINKS(dinode); + } + + return (ret); +} + +/* + * p9fs_mkdir is the main directory creation vop. Make the permissions of the new dir + * and call the create_common common code to complete the create. + */ +static int +p9fs_mkdir(struct vop_mkdir_args *ap) +{ + struct vnode *dvp; + struct vnode **vpp; + struct componentname *cnp; + uint32_t mode; + struct p9fs_node *dnp; + struct p9fs_inode *dinode; + uint32_t perm; + int ret; + + dvp = ap->a_dvp; + vpp = ap->a_vpp; + cnp = ap->a_cnp; + dnp = P9FS_VTON(dvp); + dinode = &dnp->inode; + mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); + perm = p9fs_unix2p9_mode(mode | S_IFDIR); + + P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp); + + ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp); + if (ret == 0) + P9FS_INCR_LINKS(dinode); + + return (ret); +} + +/* + * p9fs_mknod is the main node creation vop. Make the permissions of the new node + * and call the create_common common code to complete the create. + */ +static int +p9fs_mknod(struct vop_mknod_args *ap) +{ + struct vnode *dvp; + struct vnode **vpp; + struct componentname *cnp; + uint32_t mode; + struct p9fs_node *dnp; + struct p9fs_inode *dinode; + uint32_t perm; + int ret; + + dvp = ap->a_dvp; + vpp = ap->a_vpp; + cnp = ap->a_cnp; + dnp = P9FS_VTON(dvp); + dinode = &dnp->inode; + mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); + perm = p9fs_unix2p9_mode(mode); + + P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp); + + ret = create_common(dnp, cnp, NULL, perm, P9PROTO_OREAD, vpp); + if (ret == 0) { + P9FS_INCR_LINKS(dinode); + } + + return (ret); +} + +/* Convert open mode permissions to P9 */ +static int +p9fs_uflags_mode(int uflags, int extended) +{ + uint32_t ret; + + /* Convert first to O flags.*/ + uflags = OFLAGS(uflags); + + switch (uflags & 3) { + + case O_RDONLY: + ret = P9PROTO_OREAD; + break; + + case O_WRONLY: + ret = P9PROTO_OWRITE; + break; + + case O_RDWR: + ret = P9PROTO_ORDWR; + break; + } + + if (extended) { + if (uflags & O_EXCL) + ret |= P9PROTO_OEXCL; + + if (uflags & O_APPEND) + ret |= P9PROTO_OAPPEND; + } + + return (ret); +} + +/* + * This is the main open VOP for every file open. If the file is already + * open, then increment and return. If there is no open fid for this file, + * there needs to be a client_walk which creates a new open fid for this file. + * Once we have a open fid, call the open on this file with the mode creating + * the vobject. + */ +static int +p9fs_open(struct vop_open_args *ap) +{ + int error; + struct vnode *vp; + struct p9fs_node *np; + struct p9fs_session *vses; + struct p9_fid *vofid, *vfid; + size_t filesize; + uint32_t mode; + + error = 0; + vp = ap->a_vp; + np = P9FS_VTON(vp); + vses = np->p9fs_ses; + + P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp); + + if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) + return (EOPNOTSUPP); + + error = p9fs_reload_stats_dotl(vp, ap->a_cred); + if (error != 0) + return (error); + + ASSERT_VOP_LOCKED(vp, __func__); + /* + * Invalidate the pages of the vm_object cache if the file is modified + * based on the flag set in reload stats + */ + if (vp->v_type == VREG && (np->flags & P9FS_NODE_MODIFIED) != 0) { + error = vinvalbuf(vp, 0, 0, 0); + if (error != 0) + return (error); + np->flags &= ~P9FS_NODE_MODIFIED; + } + + vfid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VFID, -1, &error); + if (error != 0) + return (error); + + /* + * Translate kernel fflags to 9p mode + */ + mode = p9fs_uflags_mode(ap->a_mode, 1); + + /* + * Search the fid in vofid_list for current user. If found increase the open + * count and return. If not found clone a new fid and open the file using + * that cloned fid. + */ + vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, mode, &error); + if (vofid != NULL) { + vofid->v_opens++; + return (0); + } else { + /*vofid is the open fid for this file.*/ + vofid = p9_client_walk(vfid, 0, NULL, 1, &error); + if (error != 0) + return (error); + } + + error = p9_client_open(vofid, mode); + if (error != 0) + p9_client_clunk(vofid); + else { + vofid->v_opens = 1; + filesize = np->inode.i_size; + vnode_create_vobject(vp, filesize, ap->a_td); + p9fs_fid_add(np, vofid, VOFID); + } + + return (error); +} + +/* + * Close the open references. Just reduce the open count on vofid and return. + * Let clunking of VOFID happen in p9fs_reclaim. + */ +static int +p9fs_close(struct vop_close_args *ap) +{ + struct vnode *vp; + struct p9fs_node *np; + struct p9fs_session *vses; + struct p9_fid *vofid; + int error; + + vp = ap->a_vp; + np = P9FS_VTON(vp); + + if (np == NULL) + return (0); + + vses = np->p9fs_ses; + error = 0; + + P9_DEBUG(VOPS, "%s: file_name %s\n", __func__, np->inode.i_name); + + /* + * Translate kernel fflags to 9p mode + */ + vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, + p9fs_uflags_mode(ap->a_fflag, 1), &error); + if (vofid == NULL) + return (0); + + vofid->v_opens--; + + return (0); +} + +/* Helper routine for checking if fileops are possible on this file */ +static int +p9fs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode) +{ + + /* Check if we are allowed to write */ + switch (vap->va_type) { + case VDIR: + case VLNK: + case VREG: + /* + * Normal nodes: check if we're on a read-only mounted + * file system and bail out if we're trying to write. + */ + if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY)) + return (EROFS); + break; + case VBLK: + case VCHR: + case VSOCK: + case VFIFO: + /* + * Special nodes: even on read-only mounted file systems + * these are allowed to be written to if permissions allow. + */ + break; + default: + /* No idea what this is */ + return (EINVAL); + } + + return (0); +} + +/* Check the access permissions of the file. */ +static int +p9fs_access(struct vop_access_args *ap) +{ + struct vnode *vp; + accmode_t accmode; + struct ucred *cred; + struct vattr vap; + int error; + + vp = ap->a_vp; + accmode = ap->a_accmode; + cred = ap->a_cred; + + P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp); + + /* make sure getattr is working correctly and is defined.*/ + error = VOP_GETATTR(vp, &vap, cred); + if (error != 0) + return (error); + + error = p9fs_check_possible(vp, &vap, accmode); + if (error != 0) + return (error); + + /* Call the Generic Access check in VOPS*/ + error = vaccess(vp->v_type, vap.va_mode, vap.va_uid, vap.va_gid, accmode, + cred); + + + return (error); +} + +/* + * Reload the file stats from the server and update the inode structure present + * in p9fs node. + */ +int +p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred) +{ + struct p9_stat_dotl *stat; + int error; + struct p9fs_node *node; + struct p9fs_session *vses; + struct p9_fid *vfid; + + error = 0; + node = P9FS_VTON(vp); + vses = node->p9fs_ses; + + vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OREAD, &error); + if (vfid == NULL) { + vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error); + if (error) + return (error); + } + + stat = uma_zalloc(p9fs_getattr_zone, M_WAITOK | M_ZERO); + + error = p9_client_getattr(vfid, stat, P9PROTO_STATS_ALL); + if (error != 0) { + P9_DEBUG(ERROR, "%s: p9_client_getattr failed: %d\n", __func__, error); + goto out; + } + + /* Init the vnode with the disk info */ + p9fs_stat_vnode_dotl(stat, vp); +out: + if (stat != NULL) { + uma_zfree(p9fs_getattr_zone, stat); + } + + return (error); +} + +/* + * Read the current inode values into the vap attr. We reload the stats from + * the server. + */ +static int +p9fs_getattr_dotl(struct vop_getattr_args *ap) +{ + struct vnode *vp; + struct vattr *vap; + struct p9fs_node *node; + struct p9fs_inode *inode; + int error; + + vp = ap->a_vp; + vap = ap->a_vap; + node = P9FS_VTON(vp); + + if (node == NULL) + return (ENOENT); + + inode = &node->inode; + + P9_DEBUG(VOPS, "%s: %u %u\n", __func__, inode->i_mode, IFTOVT(inode->i_mode)); + + /* Reload our stats once to get the right values.*/ + error = p9fs_reload_stats_dotl(vp, ap->a_cred); + if (error != 0) { + P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, error); + return (error); + } + + /* Basic info */ + VATTR_NULL(vap); + + vap->va_atime.tv_sec = inode->i_atime; + vap->va_mtime.tv_sec = inode->i_mtime; + vap->va_ctime.tv_sec = inode->i_ctime; + vap->va_atime.tv_nsec = inode->i_atime_nsec; + vap->va_mtime.tv_nsec = inode->i_mtime_nsec; + vap->va_ctime.tv_nsec = inode->i_ctime_nsec; + vap->va_type = IFTOVT(inode->i_mode); + vap->va_mode = inode->i_mode; + vap->va_uid = inode->n_uid; + vap->va_gid = inode->n_gid; + vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; + vap->va_size = inode->i_size; + vap->va_nlink = inode->i_links_count; + vap->va_blocksize = inode->blksize; + vap->va_fileid = inode->i_qid_path; + vap->va_flags = inode->i_flags; + vap->va_gen = inode->gen; + vap->va_filerev = inode->data_version; + vap->va_vaflags = 0; + vap->va_bytes = inode->blocks * P9PROTO_TGETATTR_BLK; + + return (0); +} + +/* Convert a standard FreeBSD permission to P9. */ +static uint32_t +p9fs_unix2p9_mode(uint32_t mode) +{ + uint32_t res; + + res = mode & 0777; + if (S_ISDIR(mode)) + res |= P9PROTO_DMDIR; + if (S_ISSOCK(mode)) + res |= P9PROTO_DMSOCKET; + if (S_ISLNK(mode)) + res |= P9PROTO_DMSYMLINK; + if (S_ISFIFO(mode)) + res |= P9PROTO_DMNAMEDPIPE; + if ((mode & S_ISUID) == S_ISUID) + res |= P9PROTO_DMSETUID; + if ((mode & S_ISGID) == S_ISGID) + res |= P9PROTO_DMSETGID; + if ((mode & S_ISVTX) == S_ISVTX) + res |= P9PROTO_DMSETVTX; + + return (res); +} + +/* Update inode with the stats read from server.(9P2000.L version) */ +int +p9fs_stat_vnode_dotl(struct p9_stat_dotl *stat, struct vnode *vp) +{ + struct p9fs_node *np; + struct p9fs_inode *inode; + + np = P9FS_VTON(vp); + inode = &np->inode; + + ASSERT_VOP_LOCKED(vp, __func__); + /* Update the pager size if file size changes on host */ + if (inode->i_size != stat->st_size) { + inode->i_size = stat->st_size; + if (vp->v_type == VREG) + vnode_pager_setsize(vp, inode->i_size); + } + + inode->i_mtime = stat->st_mtime_sec; + inode->i_atime = stat->st_atime_sec; + inode->i_ctime = stat->st_ctime_sec; + inode->i_mtime_nsec = stat->st_mtime_nsec; + inode->i_atime_nsec = stat->st_atime_nsec; + inode->i_ctime_nsec = stat->st_ctime_nsec; + inode->n_uid = stat->st_uid; + inode->n_gid = stat->st_gid; + inode->i_mode = stat->st_mode; + vp->v_type = IFTOVT(inode->i_mode); + inode->i_links_count = stat->st_nlink; + inode->blksize = stat->st_blksize; + inode->blocks = stat->st_blocks; + inode->gen = stat->st_gen; + inode->data_version = stat->st_data_version; + + ASSERT_VOP_LOCKED(vp, __func__); + /* Setting a flag if file changes based on qid version */ + if (np->vqid.qid_version != stat->qid.version) + np->flags |= P9FS_NODE_MODIFIED; + memcpy(&np->vqid, &stat->qid, sizeof(stat->qid)); + + return (0); +} + +/* + * Write the current in memory inode stats into persistent stats structure + * to write to the server(for linux version). + */ +static int +p9fs_inode_to_iattr(struct p9fs_inode *inode, struct p9_iattr_dotl *p9attr) +{ + p9attr->size = inode->i_size; + p9attr->mode = inode->i_mode; + p9attr->uid = inode->n_uid; + p9attr->gid = inode->n_gid; + p9attr->atime_sec = inode->i_atime; + p9attr->atime_nsec = inode->i_atime_nsec; + p9attr->mtime_sec = inode->i_mtime; + p9attr->mtime_nsec = inode->i_mtime_nsec; + + return (0); +} + +/* + * Modify the ownership of a file whenever the chown is called on the + * file. + */ +static int +p9fs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, + struct thread *td) +{ + struct p9fs_node *np; + struct p9fs_inode *inode; + uid_t ouid; + gid_t ogid; + int error; + + np = P9FS_VTON(vp); + inode = &np->inode; + + if (uid == (uid_t)VNOVAL) + uid = inode->n_uid; + if (gid == (gid_t)VNOVAL) + gid = inode->n_gid; + /* + * To modify the ownership of a file, must possess VADMIN for that + * file. + */ + if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) + return (error); + /* + * To change the owner of a file, or change the group of a file to a + * group of which we are not a member, the caller must have + * privilege. + */ + if (((uid != inode->n_uid && uid != cred->cr_uid) || + (gid != inode->n_gid && !groupmember(gid, cred))) && + (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) + return (error); + + ogid = inode->n_gid; + ouid = inode->n_uid; + + inode->n_gid = gid; + inode->n_uid = uid; + + if ((inode->i_mode & (ISUID | ISGID)) && + (ouid != uid || ogid != gid)) { + + if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) + inode->i_mode &= ~(ISUID | ISGID); + } + P9_DEBUG(VOPS, "%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, cred, td); + + return (0); +} + +/* + * Update the in memory inode with all chmod new permissions/mode. Typically a + * setattr is called to update it to server. + */ +static int +p9fs_chmod(struct vnode *vp, uint32_t mode, struct ucred *cred, struct thread *td) +{ + struct p9fs_node *np; + struct p9fs_inode *inode; + uint32_t nmode; + int error; + + np = P9FS_VTON(vp); + inode = &np->inode; + + P9_DEBUG(VOPS, "%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp, mode, cred, td); + /* + * To modify the permissions on a file, must possess VADMIN + * for that file. + */ + if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) + return (error); + + /* + * Privileged processes may set the sticky bit on non-directories, + * as well as set the setgid bit on a file with a group that the + * process is not a member of. Both of these are allowed in + * jail(8). + */ + if (vp->v_type != VDIR && (mode & S_ISTXT)) { + if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) + return (EFTYPE); + } + if (!groupmember(inode->n_gid, cred) && (mode & ISGID)) { + error = priv_check_cred(cred, PRIV_VFS_SETGID); + if (error != 0) + return (error); + } + + /* + * Deny setting setuid if we are not the file owner. + */ + if ((mode & ISUID) && inode->n_uid != cred->cr_uid) { + error = priv_check_cred(cred, PRIV_VFS_ADMIN); + if (error != 0) + return (error); + } + nmode = inode->i_mode; + nmode &= ~ALLPERMS; + nmode |= (mode & ALLPERMS); + inode->i_mode = nmode; + + P9_DEBUG(VOPS, "%s: to mode %x %d \n ", __func__, nmode, error); + + return (error); +} + +/* + * Set the attributes of a file referenced by fid. A valid bitmask is sent + * in request selecting which fields to set + */ +static int +p9fs_setattr_dotl(struct vop_setattr_args *ap) +{ + struct vnode *vp; + struct vattr *vap; + struct p9fs_node *node; + struct p9fs_inode *inode; + struct ucred *cred; + struct thread *td; + struct p9_iattr_dotl *p9attr; + struct p9fs_session *vses; + struct p9_fid *vfid; + uint64_t oldfilesize; + int error; + + vp = ap->a_vp; + vap = ap->a_vap; + node = P9FS_VTON(vp); + inode = &node->inode; + cred = ap->a_cred; + td = curthread; + vses = node->p9fs_ses; + error = 0; + + if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || + (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || + (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || + (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { + P9_DEBUG(ERROR, "%s: unsettable attribute\n", __func__); + return (EINVAL); + } + /* Disallow write attempts on read only filesystem */ + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + + /* Setting of flags is not supported */ + if (vap->va_flags != VNOVAL) + return (EOPNOTSUPP); + + /* Allocate p9attr struct */ + p9attr = uma_zalloc(p9fs_setattr_zone, M_WAITOK | M_ZERO); + if (p9attr == NULL) + return (ENOMEM); + + /* Check if we need to change the ownership of the file*/ + if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { + P9_DEBUG(VOPS, "%s: vp:%p td:%p uid/gid %x/%x\n", __func__, + vp, td, vap->va_uid, vap->va_gid); + + error = p9fs_chown(vp, vap->va_uid, vap->va_gid, cred, td); + p9attr->valid |= P9PROTO_SETATTR_UID | P9PROTO_SETATTR_GID | + P9PROTO_SETATTR_MODE; + if (error) + goto out; + } + + /* Check for mode changes */ + if (vap->va_mode != (mode_t)VNOVAL) { + P9_DEBUG(VOPS, "%s: vp:%p td:%p mode %x\n", __func__, vp, td, + vap->va_mode); + + error = p9fs_chmod(vp, (int)vap->va_mode, cred, td); + p9attr->valid |= P9PROTO_SETATTR_MODE; + if (error) + goto out; + } + + /* Update the size of the file and update mtime */ + if (vap->va_size != (uint64_t)VNOVAL) { + P9_DEBUG(VOPS, "%s: vp:%p td:%p size:%jx\n", __func__, + vp, td, (uintmax_t)vap->va_size); + switch (vp->v_type) { + case VDIR: + error = EISDIR; + goto out; + case VLNK: + case VREG: + /* Invalidate cached pages of vp */ + error = vinvalbuf(vp, 0, 0, 0); + if (error) + goto out; + oldfilesize = inode->i_size; + inode->i_size = vap->va_size; + /* Update the p9fs_inode time */ + p9fs_itimes(vp); + p9attr->valid |= P9PROTO_SETATTR_SIZE | + P9PROTO_SETATTR_ATIME | + P9PROTO_SETATTR_MTIME | + P9PROTO_SETATTR_ATIME_SET | + P9PROTO_SETATTR_MTIME_SET ; + break; + default: + goto out; + } + } else if (vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL) { + P9_DEBUG(VOPS, "%s: vp:%p td:%p time a/m %jx/%jx/\n", + __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec, + (uintmax_t)vap->va_mtime.tv_sec); + /* Update the p9fs_inode times */ + p9fs_itimes(vp); + p9attr->valid |= P9PROTO_SETATTR_ATIME | + P9PROTO_SETATTR_MTIME | P9PROTO_SETATTR_ATIME_SET | + P9PROTO_SETATTR_MTIME_SET; + } + + vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OWRITE, &error); + if (vfid == NULL) { + vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error); + if (error) + goto out; + } + + /* Write the inode structure values into p9attr */ + p9fs_inode_to_iattr(inode, p9attr); + error = p9_client_setattr(vfid, p9attr); + if (vap->va_size != (uint64_t)VNOVAL && vp->v_type == VREG) { + if (error) + inode->i_size = oldfilesize; + else + vnode_pager_setsize(vp, inode->i_size); + } +out: + if (p9attr) { + uma_zfree(p9fs_setattr_zone, p9attr); + } + P9_DEBUG(VOPS, "%s: error: %d\n", __func__, error); + return (error); +} + +struct open_fid_state { + struct p9_fid *vofid; + int fflags; + int opened; +}; + +/* + * TODO: change this to take P9PROTO_* mode and avoid routing through + * VOP_OPEN, factoring out implementation of p9fs_open. + */ +static int +p9fs_get_open_fid(struct vnode *vp, int fflags, struct ucred *cr, struct open_fid_state *statep) +{ + struct p9fs_node *np; + struct p9fs_session *vses; + struct p9_fid *vofid; + int mode = p9fs_uflags_mode(fflags, TRUE); + int error = 0; + + statep->opened = FALSE; + + np = P9FS_VTON(vp); + vses = np->p9fs_ses; + vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error); + if (vofid == NULL) { + error = VOP_OPEN(vp, fflags, cr, curthread, NULL); + if (error) { + return (error); + } + vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error); + if (vofid == NULL) { + return (EBADF); + } + statep->fflags = fflags; + statep->opened = TRUE; + } + statep->vofid = vofid; + return (0); +} + +static void +p9fs_release_open_fid(struct vnode *vp, struct ucred *cr, struct open_fid_state *statep) +{ + if (statep->opened) { + (void) VOP_CLOSE(vp, statep->fflags, cr, curthread); + } +} + +/* + * An I/O buffer is used to to do any transfer. The uio is the vfs structure we + * need to copy data into. As long as resid is greater than zero, we call + * client_read to read data from offset(offset into the file) in the open fid + * for the file into the I/O buffer. The data is read into the user data buffer. + */ +static int +p9fs_read(struct vop_read_args *ap) +{ + struct vnode *vp; + struct uio *uio; + struct p9fs_node *np; + uint64_t offset; + int64_t ret; + uint64_t resid; + uint32_t count; + int error; + char *io_buffer = NULL; + uint64_t filesize; + struct open_fid_state ostate; + + vp = ap->a_vp; + uio = ap->a_uio; + np = P9FS_VTON(vp); + error = 0; + + if (vp->v_type == VCHR || vp->v_type == VBLK) + return (EOPNOTSUPP); + if (vp->v_type != VREG) + return (EISDIR); + if (uio->uio_resid == 0) + return (0); + if (uio->uio_offset < 0) + return (EINVAL); + + error = p9fs_get_open_fid(vp, FREAD, ap->a_cred, &ostate); + if (error) + return (error); + + /* where in the file are we to start reading */ + offset = uio->uio_offset; + filesize = np->inode.i_size; + if (uio->uio_offset >= filesize) + goto out; + + P9_DEBUG(VOPS, "%s: called %jd at %ju\n", + __func__, (intmax_t)uio->uio_resid, (uintmax_t)uio->uio_offset); + + /* Work with a local buffer from the pool for this vop */ + + io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO); + while ((resid = uio->uio_resid) > 0) { + if (offset >= filesize) + break; + count = MIN(filesize - uio->uio_offset , resid); + if (count == 0) + break; + + /* Copy count bytes into the uio */ + ret = p9_client_read(ostate.vofid, offset, count, io_buffer); + /* + * This is the only place in the entire p9fs where we check the + * error for < 0 as p9_client_read/write return the number of + * bytes instead of an error code. In this case if ret is < 0, + * it means there is an IO error. + */ + if (ret < 0) { + error = -ret; + goto out; + } + error = uiomove(io_buffer, ret, uio); + if (error != 0) + goto out; + + offset += ret; + } + uio->uio_offset = offset; +out: + uma_zfree(p9fs_io_buffer_zone, io_buffer); + p9fs_release_open_fid(vp, ap->a_cred, &ostate); + + return (error); +} + +/* + * The user buffer contains the data to be written. This data is copied first + * from uio into I/O buffer. This I/O buffer is used to do the client_write to + * the fid of the file starting from the offset given upto count bytes. The + * number of bytes written is returned to the caller. + */ +static int +p9fs_write(struct vop_write_args *ap) +{ + struct vnode *vp; + struct uio *uio; + struct p9fs_node *np; + uint64_t off, offset; + int64_t ret; + uint64_t resid, bytes_written; + uint32_t count; + int error, ioflag; + uint64_t file_size; + char *io_buffer = NULL; + struct open_fid_state ostate; + + vp = ap->a_vp; + uio = ap->a_uio; + np = P9FS_VTON(vp); + error = 0; + ioflag = ap->a_ioflag; + + error = p9fs_get_open_fid(vp, FWRITE, ap->a_cred, &ostate); + if (error) + return (error); + + P9_DEBUG(VOPS, "%s: %#zx at %#jx\n", + __func__, uio->uio_resid, (uintmax_t)uio->uio_offset); + + if (uio->uio_offset < 0) { + error = EINVAL; + goto out; + } + if (uio->uio_resid == 0) + goto out; + + file_size = np->inode.i_size; + + switch (vp->v_type) { + case VREG: + if (ioflag & IO_APPEND) + uio->uio_offset = file_size; + break; + case VDIR: + return (EISDIR); + case VLNK: + break; + default: + panic("%s: bad file type vp: %p", __func__, vp); + } + + resid = uio->uio_resid; + offset = uio->uio_offset; + bytes_written = 0; + error = 0; + + io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO); + while ((resid = uio->uio_resid) > 0) { + off = 0; + count = MIN(resid, P9FS_IOUNIT); + error = uiomove(io_buffer, count, uio); + + if (error != 0) { + P9_DEBUG(ERROR, "%s: uiomove failed: %d\n", __func__, error); + goto out; + } + + /* While count still exists, keep writing.*/ + while (count > 0) { + /* Copy count bytes from the uio */ + ret = p9_client_write(ostate.vofid, offset, count, + io_buffer + off); + if (ret < 0) { + if (bytes_written == 0) { + error = -ret; + goto out; + } else { + break; + } + } + P9_DEBUG(VOPS, "%s: write %#zx at %#jx\n", + __func__, uio->uio_resid, (uintmax_t)uio->uio_offset); + + off += ret; + offset += ret; + bytes_written += ret; + count -= ret; + } + } + /* Update the fields in the node to reflect the change*/ + if (file_size < uio->uio_offset + uio->uio_resid) { + np->inode.i_size = uio->uio_offset + uio->uio_resid; + vnode_pager_setsize(vp, uio->uio_offset + uio->uio_resid); + } +out: + if (io_buffer) + uma_zfree(p9fs_io_buffer_zone, io_buffer); + p9fs_release_open_fid(vp, ap->a_cred, &ostate); + + return (error); +} + +/* + * Common handler of all removal-related VOPs (e.g. rmdir, rm). Perform the + * client_remove op to send messages to remove the node's fid on the server. + * After that, does a node metadata cleanup on client side. + */ +static int +remove_common(struct p9fs_node *np, struct ucred *cred) +{ + int error; + struct p9fs_session *vses; + struct vnode *vp; + struct p9_fid *vfid; + + error = 0; + vses = np->p9fs_ses; + vp = P9FS_NTOV(np); + + vfid = p9fs_get_fid(vses->clnt, np, cred, VFID, -1, &error); + if (error != 0) + return (error); + + error = p9_client_remove(vfid); + if (error != 0) + return (error); + + /* Remove all non-open fids associated with the vp */ + p9fs_fid_remove_all(np, TRUE); + + /* Invalidate all entries of vnode from name cache and hash list. */ + cache_purge(vp); + + vfs_hash_remove(vp); + np->flags |= P9FS_NODE_DELETED; + + return (error); +} + +/* Remove vop for all files. Call common code for remove and adjust links */ +static int +p9fs_remove(struct vop_remove_args *ap) +{ + struct vnode *vp; + struct p9fs_node *np; + struct vnode *dvp; + struct p9fs_node *dnp; + struct p9fs_inode *dinode; + int error; + + vp = ap->a_vp; + np = P9FS_VTON(vp); + dvp = ap->a_dvp; + dnp = P9FS_VTON(dvp); + dinode = &dnp->inode; + + P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np); + + if (vp->v_type == VDIR) + return (EISDIR); + + error = remove_common(np, ap->a_cnp->cn_cred); + if (error == 0) + P9FS_DECR_LINKS(dinode); + + return (error); +} + +/* Remove vop for all directories. Call common code for remove and adjust links */ +static int +p9fs_rmdir(struct vop_rmdir_args *ap) +{ + struct vnode *vp; + struct p9fs_node *np; + struct vnode *dvp; + struct p9fs_node *dnp; + struct p9fs_inode *dinode; + int error; + + vp = ap->a_vp; + np = P9FS_VTON(vp); + dvp = ap->a_dvp; + dnp = P9FS_VTON(dvp); + dinode = &dnp->inode; + + P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np); + + error = remove_common(np, ap->a_cnp->cn_cred); + if (error == 0) + P9FS_DECR_LINKS(dinode); + + return (error); +} + +/* + * Create symlinks. Make the permissions and call create_common code + * for Soft links. + */ +static int +p9fs_symlink(struct vop_symlink_args *ap) +{ + struct vnode *dvp; + struct vnode **vpp; + struct vattr *vap; + struct componentname *cnp; + char *symtgt; + struct p9fs_node *dnp; + struct p9fs_session *vses; + struct mount *mp; + struct p9_fid *dvfid, *newfid; + int error; + char tmpchr; + gid_t gid; + + dvp = ap->a_dvp; + vpp = ap->a_vpp; + vap = ap->a_vap; + cnp = ap->a_cnp; + symtgt = (char*)(uintptr_t) ap->a_target; + dnp = P9FS_VTON(dvp); + vses = dnp->p9fs_ses; + mp = vses->p9fs_mount; + newfid = NULL; + error = 0; + gid = vap->va_gid; + + P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp); + + /* + * Save the character present at namelen in nameptr string and + * null terminate the character to get the search name for p9_dir_walk + */ + tmpchr = cnp->cn_nameptr[cnp->cn_namelen]; + cnp->cn_nameptr[cnp->cn_namelen] = '\0'; + + dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error); + if (error != 0) + goto out; + + error = p9_create_symlink(dvfid, cnp->cn_nameptr, symtgt, gid); + if (error != 0) + goto out; + + /*create vnode for symtgt */ + newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error); + if (newfid != NULL) { + error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags, + dnp, newfid, vpp, cnp->cn_nameptr); + if (error != 0) + goto out; + } else + goto out; + + if ((cnp->cn_flags & MAKEENTRY) != 0) { + cache_enter(P9FS_NTOV(dnp), *vpp, cnp); + } + P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n", + __func__, *vpp, dnp, (uintmax_t)dvfid->fid); + + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + return (error); + +out: + if (newfid != NULL) + p9_client_clunk(newfid); + cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; + return (error); +} + +/* Create hard link */ +static int +p9fs_link(struct vop_link_args *ap) +{ + struct vnode *vp; + struct vnode *tdvp; + struct componentname *cnp; + struct p9fs_node *dnp; + struct p9fs_node *np; + struct p9fs_inode *inode; + struct p9fs_session *vses; + struct p9_fid *dvfid, *oldvfid; + int error; + + vp = ap->a_vp; + tdvp = ap->a_tdvp; + cnp = ap->a_cnp; + dnp = P9FS_VTON(tdvp); + np = P9FS_VTON(vp); + inode = &np->inode; + vses = np->p9fs_ses; + error = 0; + + P9_DEBUG(VOPS, "%s: tdvp %p vp %p\n", __func__, tdvp, vp); + + dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error); + if (error != 0) + return (error); + oldvfid = p9fs_get_fid(vses->clnt, np, cnp->cn_cred, VFID, -1, &error); + if (error != 0) + return (error); + + error = p9_create_hardlink(dvfid, oldvfid, cnp->cn_nameptr); + if (error != 0) + return (error); + /* Increment ref count on the inode */ + P9FS_INCR_LINKS(inode); + + return (0); +} + +/* Read contents of the symbolic link */ +static int +p9fs_readlink(struct vop_readlink_args *ap) +{ + struct vnode *vp; + struct uio *uio; + struct p9fs_node *dnp; + struct p9fs_session *vses; + struct p9_fid *dvfid; + int error, len; + char *target; + + vp = ap->a_vp; + uio = ap->a_uio; + dnp = P9FS_VTON(vp); + vses = dnp->p9fs_ses; + error = 0; + + P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp); + + dvfid = p9fs_get_fid(vses->clnt, dnp, ap->a_cred, VFID, -1, &error); + if (error != 0) + return (error); + + error = p9_readlink(dvfid, &target); + if (error != 0) + return (error); + + len = strlen(target); + error = uiomove(target, len, uio); + + return (0); +} + +/* + * Iterate through a directory. An entire 8k data is read into the I/O buffer. + * This buffer is parsed to make dir entries and fed to the user buffer to + * complete it to the VFS. + */ +static int +p9fs_readdir(struct vop_readdir_args *ap) +{ + struct uio *uio; + struct vnode *vp; + struct dirent cde; + int64_t offset; + uint64_t diroffset; + struct p9fs_node *np; + int error; + int32_t count; + struct p9_client *clnt; + struct p9_dirent dent; + char *io_buffer; + struct p9_fid *vofid; + + uio = ap->a_uio; + vp = ap->a_vp; + np = P9FS_VTON(ap->a_vp); + offset = 0; + diroffset = 0; + error = 0; + count = 0; + clnt = np->p9fs_ses->clnt; + + P9_DEBUG(VOPS, "%s: vp %p, offset %ld, resid %zd\n", __func__, vp, uio->uio_offset, uio->uio_resid); + + if (ap->a_uio->uio_iov->iov_len <= 0) + return (EINVAL); + + if (vp->v_type != VDIR) + return (ENOTDIR); + + vofid = p9fs_get_fid(clnt, np, ap->a_cred, VOFID, P9PROTO_OREAD, &error); + if (vofid == NULL) { + P9_DEBUG(ERROR, "%s: NULL FID\n", __func__); + return (EBADF); + } + + io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK); + + /* We haven't reached the end yet. read more. */ + diroffset = uio->uio_offset; + while (uio->uio_resid >= sizeof(struct dirent)) { + /* + * We need to read more data as what is indicated by filesize because + * filesize is based on data stored in struct dirent structure but + * we read data in struct p9_dirent format which has different size. + * Hence we read max data(P9FS_IOUNIT) everytime from host, convert + * it into struct dirent structure and send it back. + */ + count = P9FS_IOUNIT; + bzero(io_buffer, P9FS_MTU); + count = p9_client_readdir(vofid, (char *)io_buffer, + diroffset, count); + + if (count == 0) + break; + + if (count < 0) { + error = EIO; + goto out; + } + + offset = 0; + while (offset + QEMU_DIRENTRY_SZ <= count) { + + /* + * Read and make sense out of the buffer in one dirent + * This is part of 9p protocol read. This reads one p9_dirent, + * appends it to dirent(FREEBSD specifc) and continues to parse the buffer. + */ + bzero(&dent, sizeof(dent)); + offset = p9_dirent_read(clnt, io_buffer, offset, count, + &dent); + if (offset < 0 || offset > count) { + error = EIO; + goto out; + } + + bzero(&cde, sizeof(cde)); + strncpy(cde.d_name, dent.d_name, dent.len); + cde.d_fileno = dent.qid.path; + cde.d_type = dent.d_type; + cde.d_namlen = dent.len; + cde.d_reclen = GENERIC_DIRSIZ(&cde); + + /* + * If there isn't enough space in the uio to return a + * whole dirent, break off read + */ + if (uio->uio_resid < GENERIC_DIRSIZ(&cde)) + break; + + /* Transfer */ + error = uiomove(&cde, GENERIC_DIRSIZ(&cde), uio); + if (error != 0) { + error = EIO; + goto out; + } + diroffset = dent.d_off; + } + } + /* Pass on last transferred offset */ + uio->uio_offset = diroffset; + +out: + uma_zfree(p9fs_io_buffer_zone, io_buffer); + + return (error); +} + +static void +p9fs_doio(struct vnode *vp, struct buf *bp, struct p9_fid *vofid, struct ucred *cr) +{ + struct uio *uiov; + struct iovec io; + int error; + uint64_t off, offset; + uint64_t filesize; + uint64_t resid; + uint32_t count; + int64_t ret; + struct p9fs_node *np; + char *io_buffer; + + error = 0; + np = P9FS_VTON(vp); + + filesize = np->inode.i_size; + uiov = malloc(sizeof(struct uio), M_P9UIOV, M_WAITOK); + uiov->uio_iov = &io; + uiov->uio_iovcnt = 1; + uiov->uio_segflg = UIO_SYSSPACE; + io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO); + + if (bp->b_iocmd == BIO_READ) { + io.iov_len = uiov->uio_resid = bp->b_bcount; + io.iov_base = bp->b_data; + uiov->uio_rw = UIO_READ; + + switch (vp->v_type) { + + case VREG: + { + uiov->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; + + if (uiov->uio_resid) { + int left = uiov->uio_resid; + int nread = bp->b_bcount - left; + + if (left > 0) + bzero((char *)bp->b_data + nread, left); + } + /* where in the file are we to start reading */ + offset = uiov->uio_offset; + if (uiov->uio_offset >= filesize) + goto out; + + while ((resid = uiov->uio_resid) > 0) { + if (offset >= filesize) + break; + count = min(filesize - uiov->uio_offset, resid); + if (count == 0) + break; + + P9_DEBUG(VOPS, "%s: read called %#zx at %#jx\n", + __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset); + + /* Copy count bytes into the uio */ + ret = p9_client_read(vofid, offset, count, io_buffer); + error = uiomove(io_buffer, ret, uiov); + + if (error != 0) + goto out; + offset += ret; + } + break; + } + default: + printf("vfs: type %x unexpected\n", vp->v_type); + break; + } + } else { + if (bp->b_dirtyend > bp->b_dirtyoff) { + io.iov_len = uiov->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; + uiov->uio_offset = ((off_t)bp->b_blkno) * PAGE_SIZE + bp->b_dirtyoff; + io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; + uiov->uio_rw = UIO_WRITE; + + if (uiov->uio_offset < 0) { + error = EINVAL; + goto out; + } + + if (uiov->uio_resid == 0) + goto out; + + resid = uiov->uio_resid; + offset = uiov->uio_offset; + error = 0; + + while ((resid = uiov->uio_resid) > 0) { + off = 0; + count = MIN(resid, P9FS_IOUNIT); + error = uiomove(io_buffer, count, uiov); + if (error != 0) { + goto out; + } + + while (count > 0) { + /* Copy count bytes from the uio */ + ret = p9_client_write(vofid, offset, count, + io_buffer + off); + if (ret < 0) + goto out; + + P9_DEBUG(VOPS, "%s: write called %#zx at %#jx\n", + __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset); + off += ret; + offset += ret; + count -= ret; + } + } + + /* Update the fields in the node to reflect the change */ + if (filesize < uiov->uio_offset + uiov->uio_resid) { + np->inode.i_size = uiov->uio_offset + uiov->uio_resid; + vnode_pager_setsize(vp, uiov->uio_offset + uiov->uio_resid); + /* update the modified timers. */ + p9fs_itimes(vp); + } + } else { + bp->b_resid = 0; + goto out1; + } + } +out: + /* Set the error */ + if (error != 0) { + bp->b_error = error; + bp->b_ioflags |= BIO_ERROR; + } + bp->b_resid = uiov->uio_resid; +out1: + bufdone(bp); + uma_zfree(p9fs_io_buffer_zone, io_buffer); + free(uiov, M_P9UIOV); +} + +/* + * The I/O buffer is mapped to a uio and a client_write/client_read is performed + * the same way as p9fs_read and p9fs_write. + */ +static int +p9fs_strategy(struct vop_strategy_args *ap) +{ + struct vnode *vp; + struct buf *bp; + struct ucred *cr; + int error; + struct open_fid_state ostate; + + vp = ap->a_vp; + bp = ap->a_bp; + error = 0; + + P9_DEBUG(VOPS, "%s: vp %p, iocmd %d\n ", __func__, vp, bp->b_iocmd); + + if (bp->b_iocmd == BIO_READ) + cr = bp->b_rcred; + else + cr = bp->b_wcred; + + error = p9fs_get_open_fid(vp, bp->b_iocmd == BIO_READ ? FREAD : FWRITE, cr, &ostate); + if (error) { + P9_DEBUG(ERROR, "%s: p9fs_get_open_fid failed: %d\n", __func__, error); + bp->b_error = error; + bp->b_ioflags |= BIO_ERROR; + bufdone(bp); + return (0); + } + + p9fs_doio(vp, bp, ostate.vofid, cr); + p9fs_release_open_fid(vp, cr, &ostate); + + return (0); +} + +/* Rename a file */ +static int +p9fs_rename(struct vop_rename_args *ap) +{ + struct vnode *tvp; + struct vnode *tdvp; + struct vnode *fvp; + struct vnode *fdvp; + struct componentname *tcnp; + struct componentname *fcnp; + struct p9fs_node *tdnode; + struct p9fs_node *fdnode; + struct p9fs_inode *fdinode; + struct p9fs_node *fnode; + struct p9fs_inode *finode; + struct p9fs_session *vses; + struct p9fs_node *tnode; + struct p9fs_inode *tinode; + struct p9_fid *olddirvfid, *newdirvfid ; + int error; + + tvp = ap->a_tvp; + tdvp = ap->a_tdvp; + fvp = ap->a_fvp; + fdvp = ap->a_fdvp; + tcnp = ap->a_tcnp; + fcnp = ap->a_fcnp; + tdnode = P9FS_VTON(tdvp); + fdnode = P9FS_VTON(fdvp); + fdinode = &fdnode->inode; + fnode = P9FS_VTON(fvp); + finode = &fnode->inode; + vses = fnode->p9fs_ses; + error = 0; + + P9_DEBUG(VOPS, "%s: tvp %p, tdvp %p, fvp %p, fdvp %p\n ", __func__, tvp, tdvp, fvp, fdvp); + + /* Check for cross mount operation */ + if (fvp->v_mount != tdvp->v_mount || + (tvp && (fvp->v_mount != tvp->v_mount))) { + error = EXDEV; + goto out; + } + + /* warning if you are renaming to the same name */ + if (fvp == tvp) + error = 0; + + olddirvfid = p9fs_get_fid(vses->clnt, fdnode, fcnp->cn_cred, VFID, -1, &error); + if (error != 0) + goto out; + newdirvfid = p9fs_get_fid(vses->clnt, tdnode, tcnp->cn_cred, VFID, -1, &error); + if (error != 0) + goto out; + + error = p9_client_renameat(olddirvfid, fcnp->cn_nameptr, newdirvfid, tcnp->cn_nameptr); + if (error != 0) + goto out; + + /* + * decrement the link count on the "from" file whose name is going + * to be changed if its a directory + */ + if (fvp->v_type == VDIR) { + if (tvp && tvp->v_type == VDIR) + cache_purge(tdvp); + P9FS_DECR_LINKS(fdinode); + cache_purge(fdvp); + } + + /* Taking exclusive lock on the from node before decrementing the link count */ + if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) + goto out; + P9FS_DECR_LINKS(finode); + VOP_UNLOCK(fvp); + + if (tvp) { + tnode = P9FS_VTON(tvp); + tinode = &tnode->inode; + P9FS_DECR_LINKS(tinode); + } + +out: + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + vrele(fdvp); + vrele(fvp); + return (error); +} + + +struct vop_vector p9fs_vnops = { + .vop_default = &default_vnodeops, + .vop_lookup = p9fs_lookup, + .vop_open = p9fs_open, + .vop_close = p9fs_close, + .vop_access = p9fs_access, + .vop_getattr = p9fs_getattr_dotl, + .vop_setattr = p9fs_setattr_dotl, + .vop_reclaim = p9fs_reclaim, + .vop_inactive = p9fs_inactive, + .vop_readdir = p9fs_readdir, + .vop_create = p9fs_create, + .vop_mknod = p9fs_mknod, + .vop_read = p9fs_read, + .vop_write = p9fs_write, + .vop_remove = p9fs_remove, + .vop_mkdir = p9fs_mkdir, + .vop_rmdir = p9fs_rmdir, + .vop_strategy = p9fs_strategy, + .vop_symlink = p9fs_symlink, + .vop_rename = p9fs_rename, + .vop_link = p9fs_link, + .vop_readlink = p9fs_readlink, +}; +VFS_VOP_VECTOR_REGISTER(p9fs_vnops); diff --git a/sys/kern/vfs_mountroot.c b/sys/kern/vfs_mountroot.c --- a/sys/kern/vfs_mountroot.c +++ b/sys/kern/vfs_mountroot.c @@ -1020,6 +1020,7 @@ * behaviour by setting vfs.root_mount_always_wait=1. */ if (strcmp(fs, "zfs") == 0 || strstr(fs, "nfs") != NULL || + strcmp(fs, "p9fs") == 0 || dev[0] == '\0' || root_mount_always_wait != 0) { vfs_mountroot_wait(); return (0); diff --git a/sys/modules/Makefile b/sys/modules/Makefile --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -299,6 +299,7 @@ otus \ ${_otusfw} \ ow \ + p9fs \ ${_padlock} \ ${_padlock_rng} \ ${_pchtherm} \ diff --git a/sys/modules/p9fs/Makefile b/sys/modules/p9fs/Makefile new file mode 100644 --- /dev/null +++ b/sys/modules/p9fs/Makefile @@ -0,0 +1,8 @@ +.PATH: ${SRCTOP}/sys/fs/p9fs + +KMOD= p9fs +SRCS= vnode_if.h \ + p9_client.c p9_protocol.c p9_transport.c \ + p9fs_subr.c p9fs_vfsops.c p9fs_vnops.c + +.include diff --git a/sys/modules/virtio/Makefile b/sys/modules/virtio/Makefile --- a/sys/modules/virtio/Makefile +++ b/sys/modules/virtio/Makefile @@ -22,6 +22,6 @@ # SUCH DAMAGE. # -SUBDIR= virtio pci network block balloon scsi random console +SUBDIR= virtio pci network block balloon scsi random console p9fs .include diff --git a/sys/modules/virtio/Makefile b/sys/modules/virtio/p9fs/Makefile copy from sys/modules/virtio/Makefile copy to sys/modules/virtio/p9fs/Makefile --- a/sys/modules/virtio/Makefile +++ b/sys/modules/virtio/p9fs/Makefile @@ -22,6 +22,11 @@ # SUCH DAMAGE. # -SUBDIR= virtio pci network block balloon scsi random console +.PATH: ${SRCTOP}/sys/dev/virtio/p9fs -.include +KMOD= virtio_p9fs +SRCS= virtio_p9fs.c +SRCS+= virtio_bus_if.h virtio_if.h +SRCS+= bus_if.h device_if.h + +.include