diff --git a/sys/fs/p9fs/p9_client.c b/sys/fs/p9fs/p9_client.c index 45d24870306c..547de98c4c03 100644 --- a/sys/fs/p9fs/p9_client.c +++ b/sys/fs/p9fs/p9_client.c @@ -1,1311 +1,1332 @@ /*- * Copyright (c) 2017 Juniper Networks, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ /* * This file contains 9P client functions which prepares message to be sent to * the server. Every fileop typically has a function defined here to interact * with the host. */ #include #include #include #include #include #include #include #include #include #include #include #define QEMU_HEADER 7 #define P9FS_MAX_FID_CNT (1024 * 1024 * 1024) #define P9FS_ROOT_FID_NO 2 #define P9FS_MIN_TAG 1 #define P9FS_MAX_TAG 65535 #define WSTAT_SIZE 47 #define WSTAT_EXTENSION_SIZE 14 static MALLOC_DEFINE(M_P9CLNT, "p9_client", "p9fs client structure"); static uma_zone_t p9fs_fid_zone; static uma_zone_t p9fs_req_zone; static uma_zone_t p9fs_buf_zone; SYSCTL_DECL(_vfs_p9fs); int p9_debug_level = 0; SYSCTL_INT(_vfs_p9fs, OID_AUTO, debug_level, CTLFLAG_RW, &p9_debug_level, 0, "p9fs debug logging"); static struct p9_req_t *p9_get_request(struct p9_client *c, int *error); static struct p9_req_t *p9_client_request( struct p9_client *c, int8_t type, int *error, const char *fmt, ...); inline int p9_is_proto_dotl(struct p9_client *clnt) { return (clnt->proto_version == p9_proto_2000L); } inline int p9_is_proto_dotu(struct p9_client *clnt) { return (clnt->proto_version == p9_proto_2000u); } /* Parse mount options into client structure */ static int p9_parse_opts(struct mount *mp, struct p9_client *clnt) { int error, len; char *trans; /* * Default to virtio since thats the only transport we have for now. */ error = vfs_getopt(mp->mnt_optnew, "trans", (void **)&trans, &len); if (error == ENOENT) trans = "virtio"; /* These are defaults for now */ clnt->proto_version = p9_proto_2000L; clnt->msize = 8192; /* Get the default trans callback */ clnt->ops = p9_get_trans_by_name(trans); return (0); } /* Allocate buffer for sending request and getting responses */ static struct p9_buffer * p9_buffer_alloc(int alloc_msize) { struct p9_buffer *fc; fc = uma_zalloc(p9fs_buf_zone, M_WAITOK | M_ZERO); fc->capacity = alloc_msize; fc->offset = 0; fc->size = 0; fc->sdata = (char *)fc + sizeof(struct p9_buffer); return (fc); } /* Free memory used by request and response buffers */ static void p9_buffer_free(struct p9_buffer **buf) { /* Free the sdata buffers first, then the whole structure*/ uma_zfree(p9fs_buf_zone, *buf); *buf = NULL; } /* Free the request */ static void p9_free_req(struct p9_client *clnt, struct p9_req_t *req) { if (req->tc != NULL) { if (req->tc->tag != P9_NOTAG) p9_tag_destroy(clnt, req->tc->tag); p9_buffer_free(&req->tc); } if (req->rc != NULL) p9_buffer_free(&req->rc); uma_zfree(p9fs_req_zone, req); } /* Allocate a request by tag */ static struct p9_req_t * p9_get_request(struct p9_client *clnt, int *error) { struct p9_req_t *req; int alloc_msize; uint16_t tag; alloc_msize = P9FS_MTU; req = uma_zalloc(p9fs_req_zone, M_WAITOK | M_ZERO); req->tc = p9_buffer_alloc(alloc_msize); req->rc = p9_buffer_alloc(alloc_msize); tag = p9_tag_create(clnt); if (tag == P9_NOTAG) { *error = EAGAIN; req->tc->tag = P9_NOTAG; p9_free_req(clnt, req); return (NULL); } req->tc->tag = tag; return (req); } /* Parse header arguments of the response buffer */ static int p9_parse_receive(struct p9_buffer *buf, struct p9_client *clnt) { int8_t type; int16_t tag; int32_t size; int error; buf->offset = 0; /* This value is set by QEMU for the header.*/ if (buf->size == 0) buf->size = QEMU_HEADER; /* This is the initial header. Parse size, type, and tag .*/ error = p9_buf_readf(buf, 0, "dbw", &size, &type, &tag); if (error != 0) goto out; buf->size = size; buf->id = type; buf->tag = tag; P9_DEBUG(TRANS, "%s: size=%d type: %d tag: %d\n", __func__, buf->size, buf->id, buf->tag); out: return (error); } /* Check 9P response for any errors returned and process it */ static int p9_client_check_return(struct p9_client *c, struct p9_req_t *req) { int error; int ecode; char *ename; /* Check what we have in the receive bufer .*/ error = p9_parse_receive(req->rc, c); if (error != 0) goto out; /* * No error, We are done with the preprocessing. Return to the caller * and process the actual data. */ if (req->rc->id != P9PROTO_RERROR && req->rc->id != P9PROTO_RLERROR) return (0); /* * Interpreting the error is done in different ways for Linux and * Unix version. Make sure you interpret it right. */ if (req->rc->id == P9PROTO_RERROR) { error = p9_buf_readf(req->rc, c->proto_version, "s?d", &ename, &ecode); } else if (req->rc->id == P9PROTO_RLERROR) { error = p9_buf_readf(req->rc, c->proto_version, "d", &ecode); } else { goto out; } if (error != 0) goto out; /* if there was an ecode error make this the err now */ error = ecode; /* * Note this is still not completely an error, as lookups for files * not present can hit this and return. Hence it is made a debug print. */ if (error != 0) { if (req->rc->id == P9PROTO_RERROR) { P9_DEBUG(PROTO, "RERROR error %d ename %s\n", error, ename); } else if (req->rc->id == P9PROTO_RLERROR) { P9_DEBUG(PROTO, "RLERROR error %d\n", error); } } if (req->rc->id == P9PROTO_RERROR) { free(ename, M_TEMP); } return (error); out: P9_DEBUG(ERROR, "couldn't parse receive buffer error%d\n", error); return (error); } /* State machine changing helpers */ void p9_client_disconnect(struct p9_client *clnt) { P9_DEBUG(TRANS, "%s: clnt %p\n", __func__, clnt); clnt->trans_status = P9FS_DISCONNECT; } void p9_client_begin_disconnect(struct p9_client *clnt) { P9_DEBUG(TRANS, "%s: clnt %p\n", __func__, clnt); clnt->trans_status = P9FS_BEGIN_DISCONNECT; } static struct p9_req_t * p9_client_prepare_req(struct p9_client *c, int8_t type, int req_size, int *error, const char *fmt, __va_list ap) { struct p9_req_t *req; P9_DEBUG(TRANS, "%s: client %p op %d\n", __func__, c, type); /* * Before we start with the request, check if its possible to finish * this request. We are allowed to submit the request only if there * are no close sessions happening or else there can be race. If the * status is Disconnected, we stop any requests coming in after that. */ if (c->trans_status == P9FS_DISCONNECT) { *error = EIO; return (NULL); } /* Allow only cleanup clunk messages once teardown has started. */ if ((c->trans_status == P9FS_BEGIN_DISCONNECT) && (type != P9PROTO_TCLUNK)) { *error = EIO; return (NULL); } /* Allocate buffer for transferring and receiving data from host */ req = p9_get_request(c, error); if (*error != 0) { P9_DEBUG(ERROR, "%s: request allocation failed.\n", __func__); return (NULL); } /* Marshall the data according to QEMU standards */ *error = p9_buf_prepare(req->tc, type); if (*error != 0) { P9_DEBUG(ERROR, "%s: p9_buf_prepare failed: %d\n", __func__, *error); goto out; } *error = p9_buf_vwritef(req->tc, c->proto_version, fmt, ap); if (*error != 0) { P9_DEBUG(ERROR, "%s: p9_buf_vwrite failed: %d\n", __func__, *error); goto out; } *error = p9_buf_finalize(c, req->tc); if (*error != 0) { P9_DEBUG(ERROR, "%s: p9_buf_finalize failed: %d \n", __func__, *error); goto out; } return (req); out: p9_free_req(c, req); return (NULL); } /* * Issue a request and wait for response. The routine takes care of preparing * the 9P request header to be sent, parsing and checking for error conditions * in the received buffer. It returns the request structure. */ static struct p9_req_t * p9_client_request(struct p9_client *c, int8_t type, int *error, const char *fmt, ...) { va_list ap; struct p9_req_t *req; va_start(ap, fmt); req = p9_client_prepare_req(c, type, c->msize, error, fmt, ap); va_end(ap); /* Issue with allocation of request buffer */ if (*error != 0) return (NULL); /* Call into the transport for submission. */ *error = c->ops->request(c->handle, req); if (*error != 0) { P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, *error); goto out; } /* * Before we return, pre process the header and the rc buffer before * calling into the protocol infra to analyze the data in rc. */ *error = p9_client_check_return(c, req); if (*error != 0) goto out; return (req); out: p9_free_req(c, req); return (NULL); } /* Setup tag contents and structure */ uint16_t p9_tag_create(struct p9_client *clnt) { int tag; tag = alloc_unr(&clnt->tagpool); P9_DEBUG(LPROTO, "%s: clnt %p: tag %d\n", __func__, clnt, tag); /* Alloc_unr returning -1 is an error for no units left */ if (tag == -1) { return (P9_NOTAG); } return (tag); } /* Clean up tag structures */ void p9_tag_destroy(struct p9_client *clnt, uint16_t tag) { P9_DEBUG(LPROTO, "%s: clnt %p: tag %d\n", __func__, clnt, tag); /* Release to the pool */ free_unr(&clnt->tagpool, tag); } /* Allocate a new fid from the fidpool */ struct p9_fid * p9_fid_create(struct p9_client *clnt) { struct p9_fid *fid; fid = uma_zalloc(p9fs_fid_zone, M_WAITOK | M_ZERO); fid->fid = alloc_unr(&clnt->fidpool); P9_DEBUG(LPROTO, "%s: fid %d\n", __func__, fid->fid); /* Alloc_unr returning -1 is an error for no units left */ if (fid->fid == -1) { uma_zfree(p9fs_fid_zone, fid); return (NULL); } fid->mode = -1; fid->uid = -1; fid->clnt = clnt; return (fid); } /* Free the fid by releasing it to fidpool */ void p9_fid_destroy(struct p9_fid *fid) { struct p9_client *clnt; P9_DEBUG(LPROTO, "%s: fid %d\n", __func__, fid->fid); clnt = fid->clnt; /* Release to the pool */ free_unr(&clnt->fidpool, fid->fid); uma_zfree(p9fs_fid_zone, fid); } /* Request the version of 9P protocol */ int p9_client_version(struct p9_client *c) { int error; struct p9_req_t *req; char *version; int msize; error = 0; P9_DEBUG(PROTO, "TVERSION msize %d protocol %d\n", c->msize, c->proto_version); switch (c->proto_version) { case p9_proto_2000L: req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds", c->msize, "9P2000.L"); break; case p9_proto_2000u: req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds", c->msize, "9P2000.u"); break; case p9_proto_legacy: req = p9_client_request(c, P9PROTO_TVERSION, &error, "ds", c->msize, "9P2000"); break; default: return (EINVAL); } /* Always return the relevant error code */ if (error != 0) return (error); error = p9_buf_readf(req->rc, c->proto_version, "ds", &msize, &version); if (error != 0) { P9_DEBUG(ERROR, "%s: version error: %d\n", __func__, error); goto out; } P9_DEBUG(PROTO, "RVERSION msize %d %s\n", msize, version); if (!strncmp(version, "9P2000.L", 8)) c->proto_version = p9_proto_2000L; else if (!strncmp(version, "9P2000.u", 8)) c->proto_version = p9_proto_2000u; else if (!strncmp(version, "9P2000", 6)) c->proto_version = p9_proto_legacy; else { error = ENOMEM; goto out; } /* limit the msize .*/ if (msize < c->msize) c->msize = msize; out: p9_free_req(c, req); return (error); } /* * Initialize zones for different things. This is called from Init module * so that we just have them initalized once. */ void p9_init_zones(void) { /* Create the request and the fid zones */ p9fs_fid_zone = uma_zcreate("p9fs fid zone", sizeof(struct p9_fid), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* Create the request and the fid zones */ p9fs_req_zone = uma_zcreate("p9fs req zone", sizeof(struct p9_req_t), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* Create the buffer zone */ p9fs_buf_zone = uma_zcreate("p9fs buf zone", sizeof(struct p9_buffer) + P9FS_MTU, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } void p9_destroy_zones(void) { uma_zdestroy(p9fs_fid_zone); uma_zdestroy(p9fs_req_zone); uma_zdestroy(p9fs_buf_zone); } /* Return the client to the session in the FS to hold it */ struct p9_client * p9_client_create(struct mount *mp, int *error, const char *mount_tag) { struct p9_client *clnt; clnt = malloc(sizeof(struct p9_client), M_P9CLNT, M_WAITOK | M_ZERO); mtx_init(&clnt->clnt_mtx, "p9clnt", NULL, MTX_DEF); /* Parse should have set trans_mod */ *error = p9_parse_opts(mp, clnt); if (*error != 0) goto out; if (clnt->ops == NULL) { *error = EINVAL; P9_DEBUG(ERROR, "%s: no transport\n", __func__); goto out; } /* All the structures from here are protected by the lock clnt_mtx */ init_unrhdr(&clnt->fidpool, P9FS_ROOT_FID_NO, P9FS_MAX_FID_CNT, &clnt->clnt_mtx); init_unrhdr(&clnt->tagpool, P9FS_MIN_TAG, P9FS_MAX_TAG, &clnt->clnt_mtx); P9_DEBUG(TRANS, "%s: clnt %p trans %p msize %d protocol %d\n", __func__, clnt, clnt->ops, clnt->msize, clnt->proto_version); *error = clnt->ops->create(mount_tag, &clnt->handle); if (*error != 0) { P9_DEBUG(ERROR, "%s: transport create failed .%d \n", __func__, *error); goto out; } clnt->trans_status = P9FS_CONNECT; *error = p9_client_version(clnt); if (*error != 0) goto out; P9_DEBUG(TRANS, "%s: client creation succeeded.\n", __func__); return (clnt); out: free(clnt, M_P9CLNT); return (NULL); } /* Destroy the client by destroying associated fidpool and tagpool */ void p9_client_destroy(struct p9_client *clnt) { P9_DEBUG(TRANS, "%s: client %p\n", __func__, clnt); clnt->ops->close(clnt->handle); P9_DEBUG(TRANS, "%s : Destroying fidpool\n", __func__); clear_unrhdr(&clnt->fidpool); P9_DEBUG(TRANS, "%s : Destroying tagpool\n", __func__); clear_unrhdr(&clnt->tagpool); free(clnt, M_P9CLNT); } /* * Attach a user to the filesystem. Create a fid for that user to access * the root of the filesystem. */ struct p9_fid * p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, const char *uname, uid_t n_uname, const char *aname, int *error) { struct p9_req_t *req; struct p9_fid *fid; struct p9_qid qid; P9_DEBUG(PROTO, "TATTACH uname=%s aname=%s, n_uname=%d\n", uname, aname, n_uname); fid = p9_fid_create(clnt); if (fid == NULL) { *error = ENOMEM; return (NULL); } fid->uid = n_uname; req = p9_client_request(clnt, P9PROTO_TATTACH, error, "ddssd", fid->fid, P9PROTO_NOFID, uname, aname, n_uname); if (*error != 0) goto out; *error = p9_buf_readf(req->rc, clnt->proto_version, "Q", &qid); if (*error != 0) { P9_DEBUG(ERROR, "%s: p9_buf_readf failed: %d \n", __func__, *error); goto out; } P9_DEBUG(PROTO, "RATTACH qid %x.%llx.%x\n", qid.type, (unsigned long long)qid.path, qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); p9_free_req(clnt, req); return (fid); out: if (req != NULL) p9_free_req(clnt, req); if (fid != NULL) p9_fid_destroy(fid); return (NULL); } /* Delete a file/directory. Corresponding fid will be cluncked too */ int p9_client_remove(struct p9_fid *fid) { int error; struct p9_client *clnt; struct p9_req_t *req; P9_DEBUG(PROTO, "TREMOVE fid %d\n", fid->fid); error = 0; clnt = fid->clnt; req = p9_client_request(clnt, P9PROTO_TREMOVE, &error, "d", fid->fid); if (error != 0) { P9_DEBUG(PROTO, "RREMOVE fid %d\n", fid->fid); return (error); } p9_free_req(clnt, req); return (error); } +int +p9_client_unlink(struct p9_fid *dfid, const char *name, int32_t flags) +{ + int error; + struct p9_client *clnt; + struct p9_req_t *req; + + error = 0; + clnt = dfid->clnt; + + req = p9_client_request(clnt, P9PROTO_TUNLINKAT, &error, "dsd", + dfid->fid, name, flags); + if (error != 0) { + P9_DEBUG(PROTO, "RUNLINKAT fid %d\n", dfid->fid); + return (error); + } + + p9_free_req(clnt, req); + return (error); +} + /* Inform the file server that the current file represented by fid is no longer * needed by the client. Any allocated fid on the server needs a clunk to be * destroyed. */ int p9_client_clunk(struct p9_fid *fid) { int error; struct p9_client *clnt; struct p9_req_t *req; error = 0; if (fid == NULL) { P9_DEBUG(ERROR, "%s: clunk with NULL fid is bad\n", __func__); return (0); } P9_DEBUG(PROTO, "TCLUNK fid %d \n", fid->fid); clnt = fid->clnt; req = p9_client_request(clnt, P9PROTO_TCLUNK, &error, "d", fid->fid); if (req != NULL) { P9_DEBUG(PROTO, "RCLUNK fid %d\n", fid->fid); p9_free_req(clnt, req); } p9_fid_destroy(fid); return (error); } /* * Client_walk is for searching any component name in a directory. * This is usually called on lookups. Also when we need a new open fid * as 9p needs to have an open fid for every file to fileops, we call this * validate the component of the file and return the newfid(openfid) created. */ struct p9_fid * p9_client_walk(struct p9_fid *oldfid, uint16_t nwnames, char **wnames, int clone, int *error) { struct p9_client *clnt; struct p9_fid *fid; struct p9_qid *wqids; struct p9_req_t *req; uint16_t nwqids, count; clnt = oldfid->clnt; wqids = NULL; nwqids = 0; /* * Before, we go and create fid, make sure we are not tearing * down. Only then we create. * Allow only cleanup clunk messages once we are starting to teardown. */ if (clnt->trans_status != P9FS_CONNECT) { *error = EIO; return (NULL); } if (clone) { fid = p9_fid_create(clnt); if (fid == NULL) { *error = ENOMEM; return (NULL); } fid->uid = oldfid->uid; } else fid = oldfid; P9_DEBUG(PROTO, "TWALK fids %d,%d nwnames %u wname %s\n", oldfid->fid, fid->fid, nwnames, wnames != NULL ? wnames[nwnames-1] : NULL); /* * The newfid is for the component in search. We are preallocating as * qemu on other side allocates or returns a fid if it sees a match */ req = p9_client_request(clnt, P9PROTO_TWALK, error, "ddT", oldfid->fid, fid->fid, wnames, nwnames); if (*error != 0) { if (fid != oldfid) p9_fid_destroy(fid); return (NULL); } *error = p9_buf_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (*error != 0) goto out; P9_DEBUG(PROTO, "RWALK nwqid %d:\n", nwqids); if (nwqids != nwnames) { *error = ENOENT; goto out; } for (count = 0; count < nwqids; count++) P9_DEBUG(TRANS, "%s: [%d] %x.%llx.%x\n", __func__, count, wqids[count].type, (unsigned long long)wqids[count].path, wqids[count].version); if (nwnames) memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid)); else fid->qid = oldfid->qid; p9_free_req(clnt, req); free(wqids, M_TEMP); return (fid); out: p9_free_req(clnt, req); if (wqids) free(wqids, M_TEMP); if (fid && fid != oldfid) p9_client_clunk(fid); return (NULL); } /* Open a file with given fid and mode */ int p9_client_open(struct p9_fid *fid, int mode) { int error, mtu; struct p9_client *clnt; struct p9_req_t *req; error = 0; clnt = fid->clnt; mtu = 0; P9_DEBUG(PROTO, "%s fid %d mode %d\n", p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode); if (fid->mode != -1) return (EINVAL); if (p9_is_proto_dotl(clnt)) req = p9_client_request(clnt, P9PROTO_TLOPEN, &error, "dd", fid->fid, mode); else req = p9_client_request(clnt, P9PROTO_TOPEN, &error, "db", fid->fid, mode); if (error != 0) return (error); error = p9_buf_readf(req->rc, clnt->proto_version, "Qd", &fid->qid, &mtu); if (error != 0) goto out; P9_DEBUG(PROTO, "%s qid %x.%llx.%x mtu %x\n", p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", (fid->qid).type, (unsigned long long)(fid->qid).path, (fid->qid).version, mtu); fid->mode = mode; fid->mtu = mtu; out: p9_free_req(clnt, req); return (error); } /* Request to get directory entries */ int p9_client_readdir(struct p9_fid *fid, char *data, uint64_t offset, uint32_t count) { int error; uint32_t rsize; struct p9_client *clnt; struct p9_req_t *req; char *dataptr; P9_DEBUG(PROTO, "TREADDIR fid %d offset %llu count %d\n", fid->fid, (unsigned long long) offset, count); error = 0; rsize = fid->mtu; clnt = fid->clnt; if (rsize == 0 || rsize > clnt->msize) rsize = clnt->msize; if (count < rsize) rsize = count; req = p9_client_request(clnt, P9PROTO_TREADDIR, &error, "dqd", fid->fid, offset, rsize); if (error != 0) { P9_DEBUG(ERROR, "%s: couldn't allocate req in client_readdir\n", __func__); return (-error); } error = p9_buf_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (error != 0) { P9_DEBUG(ERROR, "%s: p0_buf_readf failed: %d\n", __func__, error); p9_free_req(clnt, req); return (-error); } P9_DEBUG(PROTO, "RREADDIR count %u\n", count); /* Copy back the data into the input buffer. */ memmove(data, dataptr, count); p9_free_req(clnt, req); return (count); } /* * Read count bytes from offset for the file fid into the character * buffer data. This buffer is handed over to p9fs to process into user * buffers. Note that this function typically returns the number of bytes read * so in case of an error we return -error so that we can distinguish between * error codes and bytes. */ int p9_client_read(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data) { struct p9_client *clnt; struct p9_req_t *req; char *dataptr; int error, rsize; clnt = fid->clnt; rsize = fid->mtu; error = 0; P9_DEBUG(PROTO, "TREAD fid %d offset %llu %u\n", fid->fid, (unsigned long long) offset, count); if (!rsize || rsize > clnt->msize) rsize = clnt->msize; if (count < rsize) rsize = count; /* At this stage, we only have 8K buffers so only transfer */ req = p9_client_request(clnt, P9PROTO_TREAD, &error, "dqd", fid->fid, offset, rsize); if (error != 0) { P9_DEBUG(ERROR, "%s: failed allocate request\n", __func__); return (-error); } error = p9_buf_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (error != 0) { P9_DEBUG(ERROR, "%s: p9_buf_readf failed: %d\n", __func__, error); goto out; } if (rsize < count) { P9_DEBUG(PROTO, "RREAD count (%d > %d)\n", count, rsize); count = rsize; } P9_DEBUG(PROTO, "RREAD count %d\n", count); if (count == 0) { error = -EIO; P9_DEBUG(ERROR, "%s: EIO error in client_read \n", __func__); goto out; } /* Copy back the data into the input buffer. */ memmove(data, dataptr, count); p9_free_req(clnt, req); return (count); out: p9_free_req(clnt, req); return (-error); } /* * Write count bytes from buffer to the offset for the file fid * Note that this function typically returns the number of bytes written * so in case of an error we return -error so that we can distinguish between * error codes and bytes. */ int p9_client_write(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data) { struct p9_client *clnt; struct p9_req_t *req; int ret, error, rsize; clnt = fid->clnt; rsize = fid->mtu; ret = 0; error = 0; P9_DEBUG(PROTO, "TWRITE fid %d offset %llu %u\n", fid->fid, (unsigned long long) offset, count); if (!rsize || rsize > clnt->msize) rsize = clnt->msize; /* Limit set by Qemu ,8168 */ if (count > rsize) { count = rsize; } /* * Doing the Data blob instead. If at all we add the zerocopy, we can * change it to uio direct copy */ req = p9_client_request(clnt, P9PROTO_TWRITE, &error, "dqD", fid->fid, offset, count, data); if (error != 0) { P9_DEBUG(ERROR, "%s: failed allocate request: %d\n", __func__, error); return (-error); } error = p9_buf_readf(req->rc, clnt->proto_version, "d", &ret); if (error != 0) { P9_DEBUG(ERROR, "%s: p9_buf_readf error: %d\n", __func__, error); goto out; } if (count < ret) { P9_DEBUG(PROTO, "RWRITE count (%d > %d)\n", count, ret); ret = count; } P9_DEBUG(PROTO, "RWRITE count %d\n", ret); if (count == 0) { error = EIO; P9_DEBUG(ERROR, "%s: EIO error\n", __func__); goto out; } p9_free_req(clnt, req); return (ret); out: p9_free_req(clnt, req); return (-error); } /* Create file under directory fid, with name, permissions, mode. */ int p9_client_file_create(struct p9_fid *fid, char *name, uint32_t perm, int mode, char *extension) { int error; struct p9_client *clnt; struct p9_req_t *req; struct p9_qid qid; int mtu; P9_DEBUG(PROTO, "TCREATE fid %d name %s perm %d mode %d\n", fid->fid, name, perm, mode); clnt = fid->clnt; error = 0; if (fid->mode != -1) return (EINVAL); req = p9_client_request(clnt, P9PROTO_TCREATE, &error, "dsdb?s", fid->fid, name, perm, mode, extension); if (error != 0) return (error); error = p9_buf_readf(req->rc, clnt->proto_version, "Qd", &qid, &mtu); if (error != 0) goto out; P9_DEBUG(PROTO, "RCREATE qid %x.%jx.%x mtu %x\n", qid.type, (uintmax_t)qid.path, qid.version, mtu); fid->mode = mode; fid->mtu = mtu; out: p9_free_req(clnt, req); return (error); } /* Request file system information of the file system */ int p9_client_statfs(struct p9_fid *fid, struct p9_statfs *stat) { int error; struct p9_req_t *req; struct p9_client *clnt; error = 0; clnt = fid->clnt; P9_DEBUG(PROTO, "TSTATFS fid %d\n", fid->fid); req = p9_client_request(clnt, P9PROTO_TSTATFS, &error, "d", fid->fid); if (error != 0) { return (error); } error = p9_buf_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &stat->type, &stat->bsize, &stat->blocks, &stat->bfree, &stat->bavail, &stat->files, &stat->ffree, &stat->fsid, &stat->namelen); if (error != 0) goto out; P9_DEBUG(PROTO, "RSTATFS fid %d type 0x%jx bsize %ju " "blocks %ju bfree %ju bavail %ju files %ju ffree %ju " "fsid %ju namelen %ju\n", fid->fid, (uintmax_t)stat->type, (uintmax_t)stat->bsize, (uintmax_t)stat->blocks, (uintmax_t)stat->bfree, (uintmax_t)stat->bavail, (uintmax_t)stat->files, (uintmax_t)stat->ffree, (uintmax_t)stat->fsid, (uintmax_t)stat->namelen); out: p9_free_req(clnt, req); return (error); } /* Rename file referenced by the fid */ int p9_client_renameat(struct p9_fid *oldfid, char *oldname, struct p9_fid *newfid, char *newname) { int error; struct p9_client *clnt; struct p9_req_t *req; P9_DEBUG(PROTO, "TRENAMEAT oldfid %d oldname %s newfid %d newfid %s", oldfid->fid, oldname, newfid->fid, newname); error = 0; clnt = oldfid->clnt; /* * we are calling the request with TRENAMEAT tag and not TRENAME with * the 9p protocol version 9p2000.u as the QEMU version supports this * version of renaming */ req = p9_client_request(clnt, P9PROTO_TRENAMEAT, &error, "dsds", oldfid->fid, oldname, newfid->fid, newname); if (error != 0) return (error); p9_free_req(clnt, req); return (error); } /* Request to create symbolic link */ int p9_create_symlink(struct p9_fid *fid, char *name, char *symtgt, gid_t gid) { int error; struct p9_req_t *req; struct p9_client *clnt; struct p9_qid qid; error = 0; clnt = fid->clnt; P9_DEBUG(PROTO, "TSYMLINK fid %d name %s\n", fid->fid, name); req = p9_client_request(clnt, P9PROTO_TSYMLINK, &error, "dssd", fid->fid, name, symtgt, gid); if (error != 0) return (error); error = p9_buf_readf(req->rc, clnt->proto_version, "Q", &qid); if (error != 0) { P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, error); return (error); } P9_DEBUG(PROTO, "RSYMLINK qid %x.%jx.%x\n", qid.type, (uintmax_t)qid.path, qid.version); p9_free_req(clnt, req); return (0); } /* Request to create hard link */ int p9_create_hardlink(struct p9_fid *dfid, struct p9_fid *oldfid, char *name) { int error; struct p9_req_t *req; struct p9_client *clnt; error = 0; clnt = dfid->clnt; P9_DEBUG(PROTO, "TLINK dfid %d oldfid %d name %s\n", dfid->fid, oldfid->fid, name); req = p9_client_request(clnt, P9PROTO_TLINK, &error, "dds", dfid->fid, oldfid->fid, name); if (error != 0) return (error); p9_free_req(clnt, req); return (0); } /* Request to read contents of symbolic link */ int p9_readlink(struct p9_fid *fid, char **target) { int error; struct p9_client *clnt; struct p9_req_t *req; error = 0; clnt = fid->clnt; P9_DEBUG(PROTO, "TREADLINK fid %d\n", fid->fid); req = p9_client_request(clnt, P9PROTO_TREADLINK, &error, "d", fid->fid); if (error != 0) return (error); error = p9_buf_readf(req->rc, clnt->proto_version, "s", target); if (error != 0) { P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, error); return (error); } P9_DEBUG(PROTO, "RREADLINK target %s \n", *target); p9_free_req(clnt, req); return (0); } /* Get file attributes of the file referenced by the fid */ int p9_client_getattr(struct p9_fid *fid, struct p9_stat_dotl *stat_dotl, uint64_t request_mask) { int err; struct p9_client *clnt; struct p9_req_t *req; err = 0; P9_DEBUG(PROTO, "TGETATTR fid %d mask %ju\n", fid->fid, (uintmax_t)request_mask); clnt = fid->clnt; req = p9_client_request(clnt, P9PROTO_TGETATTR, &err, "dq", fid->fid, request_mask); if (req == NULL) { P9_DEBUG(ERROR, "%s: allocation failed %d", __func__, err); goto error; } err = p9_buf_readf(req->rc, clnt->proto_version, "A", stat_dotl); if (err != 0) { P9_DEBUG(ERROR, "%s: buf_readf failed %d\n", __func__, err); goto error; } p9_free_req(clnt, req); P9_DEBUG(PROTO, "RGETATTR fid %d qid %x.%jx.%x st_mode %8.8x " "uid %d gid %d nlink %ju rdev %jx st_size %jx blksize %ju " "blocks %ju st_atime_sec %ju, st_atime_nsec %ju " "st_mtime_sec %ju, st_mtime_nsec %ju st_ctime_sec %ju " "st_ctime_nsec %ju st_btime_sec %ju, st_btime_nsec %ju " "st_stat %ju, st_data_version %ju \n", fid->fid, stat_dotl->qid.type, (uintmax_t)stat_dotl->qid.path, stat_dotl->qid.version, stat_dotl->st_mode, stat_dotl->st_uid, stat_dotl->st_gid, (uintmax_t)stat_dotl->st_nlink, (uintmax_t)stat_dotl->st_rdev, (uintmax_t)stat_dotl->st_size, (uintmax_t)stat_dotl->st_blksize, (uintmax_t)stat_dotl->st_blocks, (uintmax_t)stat_dotl->st_atime_sec, (uintmax_t)stat_dotl->st_atime_nsec, (uintmax_t)stat_dotl->st_mtime_sec, (uintmax_t)stat_dotl->st_mtime_nsec, (uintmax_t)stat_dotl->st_ctime_sec, (uintmax_t)stat_dotl->st_ctime_nsec, (uintmax_t)stat_dotl->st_btime_sec, (uintmax_t)stat_dotl->st_btime_nsec, (uintmax_t)stat_dotl->st_gen, (uintmax_t)stat_dotl->st_data_version); return (err); error: if (req != NULL) p9_free_req(clnt, req); return (err); } /* Set file attributes of the file referenced by the fid */ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) { int err; struct p9_req_t *req; struct p9_client *clnt; err = 0; P9_DEBUG(PROTO, "TSETATTR fid %d" " valid %x mode %x uid %d gid %d size %ju" " atime_sec %ju atime_nsec %ju" " mtime_sec %ju mtime_nsec %ju\n", fid->fid, p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid, (uintmax_t)p9attr->size, (uintmax_t)p9attr->atime_sec, (uintmax_t)p9attr->atime_nsec, (uintmax_t)p9attr->mtime_sec, (uintmax_t)p9attr->mtime_nsec); clnt = fid->clnt; /* Any client_request error is converted to req == NULL error*/ req = p9_client_request(clnt, P9PROTO_TSETATTR, &err, "dA", fid->fid, p9attr); if (req == NULL) { P9_DEBUG(ERROR, "%s: allocation failed %d\n", __func__, err); goto error; } p9_free_req(clnt, req); error: return (err); } diff --git a/sys/fs/p9fs/p9_client.h b/sys/fs/p9fs/p9_client.h index 8597c0732ba3..79d402c174e8 100644 --- a/sys/fs/p9fs/p9_client.h +++ b/sys/fs/p9fs/p9_client.h @@ -1,168 +1,169 @@ /*- * Copyright (c) 2017 Juniper Networks, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* 9P client definitions */ #ifndef FS_P9FS_P9_CLIENT_H #define FS_P9FS_P9_CLIENT_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* 9P protocol versions */ enum p9_proto_versions { p9_proto_legacy, /* legacy version */ p9_proto_2000u, /* Unix version */ p9_proto_2000L, /* Linux version */ }; /* P9 Request exchanged between Host and Guest */ struct p9_req_t { struct p9_buffer *tc; /* request buffer */ struct p9_buffer *rc; /* response buffer */ }; /* 9P transport status */ enum transport_status { P9FS_CONNECT, /* transport is connected */ P9FS_BEGIN_DISCONNECT,/* transport has begun to disconnect */ P9FS_DISCONNECT, /* transport has been dosconnected */ }; /* This is set by QEMU so we will oblige */ #define P9FS_MTU 8192 /* * Even though we have a 8k buffer, Qemu is typically doing 8168 * because of a HDR of 24. Use that amount for transfers so that we dont * drop anything. */ #define P9FS_IOUNIT (P9FS_MTU - 24) #define P9FS_DIRENT_LEN 256 #define P9_NOTAG 0 /* Client state information */ struct p9_client { struct p9_trans_module *ops; /* module API instantiated with this client */ void *handle; /* module-specific client handle */ struct mtx clnt_mtx; /* mutex to lock the client */ struct mtx req_mtx; /* mutex to lock the request buffer */ struct cv req_cv; /* condition variable on which to wake up thread */ unsigned int msize; /* maximum data size */ unsigned char proto_version; /* 9P version to use */ struct unrhdr fidpool; /* fid handle accounting for session */ struct unrhdr tagpool; /* transaction id accounting for session */ enum transport_status trans_status; /* tranport instance state */ }; /* The main fid structure which keeps track of the file.*/ struct p9_fid { struct p9_client *clnt; /* the instatntiating 9P client */ uint32_t fid; /* numeric identifier */ int mode; /* current mode of this fid */ struct p9_qid qid; /* server identifier */ uint32_t mtu; /* max transferrable unit at a time */ uid_t uid; /* numeric uid of the local user who owns this handle */ int v_opens; /* keep count on the number of opens called with this fiel handle */ STAILQ_ENTRY(p9_fid) fid_next; /* points to next fid in the list */ }; /* Directory entry structure */ struct p9_dirent { struct p9_qid qid; /* 9P server qid for this dirent */ uint64_t d_off; /* offset to the next dirent */ unsigned char d_type; /* file type */ char d_name[P9FS_DIRENT_LEN]; /* file name */ int len; }; void p9_init_zones(void); void p9_destroy_zones(void); /* Session and client Init Ops */ struct p9_client *p9_client_create(struct mount *mp, int *error, const char *mount_tag); void p9_client_destroy(struct p9_client *clnt); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *fid, const char *uname, uid_t n_uname, const char *aname, int *error); /* FILE OPS - These are individually called from the specific vop function */ int p9_client_open(struct p9_fid *fid, int mode); int p9_client_close(struct p9_fid *fid); struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwnames, char **wnames, int clone, int *error); struct p9_fid *p9_fid_create(struct p9_client *clnt); void p9_fid_destroy(struct p9_fid *fid); uint16_t p9_tag_create(struct p9_client *clnt); void p9_tag_destroy(struct p9_client *clnt, uint16_t tag); int p9_client_clunk(struct p9_fid *fid); int p9_client_version(struct p9_client *clnt); int p9_client_readdir(struct p9_fid *fid, char *data, uint64_t offset, uint32_t count); int p9_client_read(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data); int p9_client_write(struct p9_fid *fid, uint64_t offset, uint32_t count, char *data); int p9_client_file_create(struct p9_fid *fid, char *name, uint32_t perm, int mode, char *extension); int p9_client_remove(struct p9_fid *fid); +int p9_client_unlink(struct p9_fid *dfid, const char *name, int32_t flags); int p9_dirent_read(struct p9_client *clnt, char *buf, int start, int len, struct p9_dirent *dirent); int p9_client_statfs(struct p9_fid *fid, struct p9_statfs *stat); int p9_client_statread(struct p9_client *clnt, char *data, size_t len, struct p9_wstat *st); int p9_is_proto_dotu(struct p9_client *clnt); int p9_is_proto_dotl(struct p9_client *clnt); void p9_client_cb(struct p9_client *c, struct p9_req_t *req); int p9stat_read(struct p9_client *clnt, char *data, size_t len, struct p9_wstat *st); void p9_client_disconnect(struct p9_client *clnt); void p9_client_begin_disconnect(struct p9_client *clnt); int p9_create_symlink(struct p9_fid *fid, char *name, char *symtgt, gid_t gid); int p9_create_hardlink(struct p9_fid *dfid, struct p9_fid *oldfid, char *name); int p9_readlink(struct p9_fid *fid, char **target); int p9_client_renameat(struct p9_fid *oldfid, char *oldname, struct p9_fid *newfid, char *newname); int p9_client_getattr(struct p9_fid *fid, struct p9_stat_dotl *stat_dotl, uint64_t request_mask); int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr); int p9_buf_vwritef(struct p9_buffer *buf, int proto_version, const char *fmt, va_list ap); int p9_buf_readf(struct p9_buffer *buf, int proto_version, const char *fmt, ...); int p9_buf_prepare(struct p9_buffer *buf, int8_t type); int p9_buf_finalize(struct p9_client *clnt, struct p9_buffer *buf); void p9_buf_reset(struct p9_buffer *buf); #endif /* FS_P9FS_P9_CLIENT_H */ diff --git a/sys/fs/p9fs/p9_protocol.h b/sys/fs/p9fs/p9_protocol.h index ddd8571adc8d..7ffd7dd67bcf 100644 --- a/sys/fs/p9fs/p9_protocol.h +++ b/sys/fs/p9fs/p9_protocol.h @@ -1,280 +1,282 @@ /*- * Copyright (c) 2017 Juniper Networks, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* File contains 9P protocol definitions */ #ifndef FS_P9FS_P9_PROTOCOL_H #define FS_P9FS_P9_PROTOCOL_H #include /* 9P message types */ enum p9_cmds_t { P9PROTO_TLERROR = 6, /* not used */ P9PROTO_RLERROR, /* response for any failed request */ P9PROTO_TSTATFS = 8, /* file system status request */ P9PROTO_RSTATFS, /* file system status response */ P9PROTO_TLOPEN = 12, /* open a file (9P2000.L) */ P9PROTO_RLOPEN, /* response to opne request (9P2000.L) */ P9PROTO_TLCREATE = 14, /* prepare for handle for I/O on a new file (9P2000.L) */ P9PROTO_RLCREATE, /* response with file access information (9P2000.L) */ P9PROTO_TSYMLINK = 16, /* symlink creation request */ P9PROTO_RSYMLINK, /* symlink creation response */ P9PROTO_TMKNOD = 18, /* create a special file object request */ P9PROTO_RMKNOD, /* create a special file object response */ P9PROTO_TRENAME = 20, /* rename a file request */ P9PROTO_RRENAME, /* rename a file response */ P9PROTO_TREADLINK = 22, /* request to read value of symbolic link */ P9PROTO_RREADLINK, /* response to read value of symbolic link request */ P9PROTO_TGETATTR = 24, /* get file attributes request */ P9PROTO_RGETATTR, /* get file attributes response */ P9PROTO_TSETATTR = 26, /* set file attributes request */ P9PROTO_RSETATTR, /* set file attributes response */ P9PROTO_TXATTRWALK = 30,/* request to read extended attributes */ P9PROTO_RXATTRWALK, /* response from server with attributes */ P9PROTO_TXATTRCREATE = 32,/* request to set extended attribute */ P9PROTO_RXATTRCREATE, /* response from server for setting extended attribute */ P9PROTO_TREADDIR = 40, /* request to read a directory */ P9PROTO_RREADDIR, /* response from server for read request */ P9PROTO_TFSYNC = 50, /* request to flush an cached data to disk */ P9PROTO_RFSYNC, /* response when cache dat is flushed */ P9PROTO_TLOCK = 52, /* acquire or release a POSIX record lock */ P9PROTO_RLOCK, /* response with the status of the lock */ P9PROTO_TGETLOCK = 54, /* request to check for presence of a POSIX record lock */ P9PROTO_RGETLOCK, /* response with the details of the lock if acquired */ P9PROTO_TLINK = 70, /* request to create hard link */ P9PROTO_RLINK, /* create hard link response */ P9PROTO_TMKDIR = 72, /* create a directory request */ P9PROTO_RMKDIR, /* create a directory response */ P9PROTO_TRENAMEAT = 74, /* request to rename a file or directory */ P9PROTO_RRENAMEAT, /* reponse to rename request */ P9PROTO_TUNLINKAT = 76, /* unlink a file or directory */ P9PROTO_RUNLINKAT, /* reponse to unlink request */ P9PROTO_TVERSION = 100, /* request for version handshake */ P9PROTO_RVERSION, /* response for version handshake */ P9PROTO_TAUTH = 102, /* request to establish authentication channel */ P9PROTO_RAUTH, /* response with authentication information */ P9PROTO_TATTACH = 104, /* establish a user access to a file system*/ P9PROTO_RATTACH, /* response with top level handle to file hierarchy */ P9PROTO_TERROR = 106, /* not used */ P9PROTO_RERROR, /* response for any failed request */ P9PROTO_TFLUSH = 108, /* request to abort a previous request */ P9PROTO_RFLUSH, /* response when previous request has been cancelled */ P9PROTO_TWALK = 110, /* descend a directory hierarchy */ P9PROTO_RWALK, /* response with new handle for position within hierarchy */ P9PROTO_TOPEN = 112, /* prepare file handle for I/O for an existing file */ P9PROTO_ROPEN, /* response with file access information */ P9PROTO_TCREATE = 114, /* prepare for handle for I/O on a new file */ P9PROTO_RCREATE, /* response with file access information */ P9PROTO_TREAD = 116, /* request to transfer data from a file */ P9PROTO_RREAD, /* response with data requested */ P9PROTO_TWRITE = 118, /* request to transfer data to a file */ P9PROTO_RWRITE, /* response with how much data was written to the file */ P9PROTO_TCLUNK = 120, /* forget about a handle to a file within the File System */ P9PROTO_RCLUNK, /* response from the server for forgetting the file handle */ P9PROTO_TREMOVE = 122, /* request to remove a file */ P9PROTO_RREMOVE, /* response when server has removed the file */ P9PROTO_TSTAT = 124, /* request file entity attributes */ P9PROTO_RSTAT, /* response with file entity attributes */ P9PROTO_TWSTAT = 126, /* request to update file entity attributes */ P9PROTO_RWSTAT, /* response when file entity attributes are updated */ }; /* File Open Modes */ enum p9_open_mode_t { P9PROTO_OREAD = 0x00, /* open file for reading only */ P9PROTO_OWRITE = 0x01, /* open file for writing only */ P9PROTO_ORDWR = 0x02, /* open file for both reading and writing */ P9PROTO_OEXEC = 0x03, /* open file for execution */ P9PROTO_OTRUNC = 0x10, /* truncate file to zero length before opening it */ P9PROTO_OREXEC = 0x20, /* close the file when exec system call is made */ P9PROTO_ORCLOSE = 0x40, /* remove the file when it is closed */ P9PROTO_OAPPEND = 0x80, /* open the file and seek to the end of the file */ P9PROTO_OEXCL = 0x1000, /* only create a file and not open it */ }; /* FIle Permissions */ enum p9_perm_t { P9PROTO_DMDIR = 0x80000000, /* permission bit for directories */ P9PROTO_DMAPPEND = 0x40000000, /* permission bit for is append-only */ P9PROTO_DMEXCL = 0x20000000, /* permission bit for exclusive use (only one open handle allowed) */ P9PROTO_DMMOUNT = 0x10000000, /* permission bit for mount points */ P9PROTO_DMAUTH = 0x08000000, /* permission bit for authentication file */ P9PROTO_DMTMP = 0x04000000, /* permission bit for non-backed-up files */ P9PROTO_DMSYMLINK = 0x02000000, /* permission bit for symbolic link (9P2000.u) */ P9PROTO_DMLINK = 0x01000000, /* permission bit for hard-link (9P2000.u) */ P9PROTO_DMDEVICE = 0x00800000, /* permission bit for device files (9P2000.u) */ P9PROTO_DMNAMEDPIPE = 0x00200000,/* permission bit for named pipe (9P2000.u) */ P9PROTO_DMSOCKET = 0x00100000, /* permission bit for socket (9P2000.u) */ P9PROTO_DMSETUID = 0x00080000, /* permission bit for setuid (9P2000.u) */ P9PROTO_DMSETGID = 0x00040000, /* permission bit for setgid (9P2000.u) */ P9PROTO_DMSETVTX = 0x00010000, /* permission bit for sticky bit (9P2000.u) */ }; /* * QID types - they are primarly used to * differentiate semantics for a file system */ enum p9_qid_t { P9PROTO_QTDIR = 0x80, /* directory */ P9PROTO_QTAPPEND = 0x40, /* append-only */ P9PROTO_QTEXCL = 0x20, /* exclusive use (only one open handle allowed)*/ P9PROTO_QTMOUNT = 0x10, /* mount points */ P9PROTO_QTAUTH = 0x08, /* authentication file */ P9PROTO_QTTMP = 0x04, /* non-backed-up files */ P9PROTO_QTSYMLINK = 0x02, /* symbolic links */ P9PROTO_QTLINK = 0x01, /* hard link */ P9PROTO_QTFILE = 0x00, /* normal files */ }; /* P9 Magic Numbers */ #define P9PROTO_NOFID (uint32_t)(~0) #define P9_DEFUNAME "nobody" #define P9_DEFANAME "" #define P9_NONUNAME (uint32_t)(~0) #define P9_MAXWELEM 16 /* Exchange unit between Qemu and Client */ struct p9_qid { uint8_t type; /* the type of the file */ uint32_t version; /* version number for given path */ uint64_t path; /* the file servers unique id for file */ }; /* FS information stat structure */ struct p9_statfs { uint32_t type; /* type of file system */ uint32_t bsize; /* optimal transfer block size */ uint64_t blocks; /* total data blocks in file system */ uint64_t bfree; /* free blocks in fs */ uint64_t bavail; /* free blocks avail to non-superuser */ uint64_t files; /* total file nodes in file system */ uint64_t ffree; /* free file nodes in fs */ uint64_t fsid; /* file system id */ uint32_t namelen; /* maximum length of filenames */ }; /* File system metadata information */ struct p9_wstat { uint16_t size; /* total byte count of the following data */ uint16_t type; /* type of file */ uint32_t dev; /* id of device containing file */ struct p9_qid qid; /* identifier used by server for file system entity information */ uint32_t mode; /* protection */ uint32_t atime; /* time of last access */ uint32_t mtime; /* time of last modification */ uint64_t length; /* length of file in bytes */ char *name; /* file name */ char *uid; /* user ID of owner */ char *gid; /* group ID of owner */ char *muid; /* name of the user who last modified the file */ char *extension; /* 9p2000.u extensions */ uid_t n_uid; /* 9p2000.u extensions */ gid_t n_gid; /* 9p2000.u extensions */ uid_t n_muid; /* 9p2000.u extensions */ }; /* The linux version of FS information stat structure*/ struct p9_stat_dotl { uint64_t st_result_mask;/* indicates fields that are requested */ struct p9_qid qid; /* identifier used by server for file system entity information */ uint32_t st_mode; /* protection */ uid_t st_uid; /* user ID of owner */ gid_t st_gid; /* group ID of owner */ uint64_t st_nlink; /* number of hard links */ uint64_t st_rdev; /* device ID (if special file) */ uint64_t st_size; /* total size, in bytes */ uint64_t st_blksize; /* blocksize for file system I/O */ uint64_t st_blocks; /* number of 512B blocks allocated */ uint64_t st_atime_sec; /* time of last access, seconds */ uint64_t st_atime_nsec; /* time of last access, nanoseconds */ uint64_t st_mtime_sec; /* time of last modification, seconds */ uint64_t st_mtime_nsec; /* time of last modifictaion, nanoseconds */ uint64_t st_ctime_sec; /* time of last status change, seconds*/ uint64_t st_ctime_nsec; /* time of last status change, nanoseconds*/ uint64_t st_btime_sec; /* following memebers are reserved for future use */ uint64_t st_btime_nsec; uint64_t st_gen; uint64_t st_data_version; }; /* P9 inode attribute for setattr */ struct p9_iattr_dotl { uint32_t valid; /* bit fields specifying which fields are valid */ uint32_t mode; /* protection */ uid_t uid; /* user id of owner */ gid_t gid; /* group id */ uint64_t size; /* file size */ uint64_t atime_sec; /* last access time in seconds */ uint64_t atime_nsec; /* last access time in nanoseconds */ uint64_t mtime_sec; /* last modification time in seconds */ uint64_t mtime_nsec; /* last modification time in nanoseconds */ }; #define P9PROTO_STATS_MODE 0x00000001ULL #define P9PROTO_STATS_NLINK 0x00000002ULL #define P9PROTO_STATS_UID 0x00000004ULL #define P9PROTO_STATS_GID 0x00000008ULL #define P9PROTO_STATS_RDEV 0x00000010ULL #define P9PROTO_STATS_ATIME 0x00000020ULL #define P9PROTO_STATS_MTIME 0x00000040ULL #define P9PROTO_STATS_CTIME 0x00000080ULL #define P9PROTO_STATS_INO 0x00000100ULL #define P9PROTO_STATS_SIZE 0x00000200ULL #define P9PROTO_STATS_BLOCKS 0x00000400ULL #define P9PROTO_STATS_BTIME 0x00000800ULL #define P9PROTO_STATS_GEN 0x00001000ULL #define P9PROTO_STATS_DATA_VERSION 0x00002000ULL #define P9PROTO_STATS_BASIC 0x000007ffULL /* Mask for fields up to BLOCKS */ #define P9PROTO_STATS_ALL 0x00003fffULL /* Mask for All fields above */ #define P9PROTO_SETATTR_MODE 0x00000001UL #define P9PROTO_SETATTR_UID 0x00000002UL #define P9PROTO_SETATTR_GID 0x00000004UL #define P9PROTO_SETATTR_SIZE 0x00000008UL #define P9PROTO_SETATTR_ATIME 0x00000010UL #define P9PROTO_SETATTR_MTIME 0x00000020UL #define P9PROTO_SETATTR_CTIME 0x00000040UL #define P9PROTO_SETATTR_ATIME_SET 0x00000080UL #define P9PROTO_SETATTR_MTIME_SET 0x00000100UL #define P9PROTO_SETATTR_MASK 0x000001bfUL #define P9PROTO_TGETATTR_BLK 512 +#define P9PROTO_UNLINKAT_REMOVEDIR 0x200 + /* PDU buffer used for SG lists. */ struct p9_buffer { uint32_t size; uint16_t tag; uint8_t id; size_t offset; size_t capacity; uint8_t *sdata; }; #endif /* FS_P9FS_P9_PROTOCOL_H */ diff --git a/sys/fs/p9fs/p9fs_vnops.c b/sys/fs/p9fs/p9fs_vnops.c index 14951e96b59d..cfb50c704019 100644 --- a/sys/fs/p9fs/p9fs_vnops.c +++ b/sys/fs/p9fs/p9fs_vnops.c @@ -1,2223 +1,2230 @@ /* * Copyright (c) 2017-2020 Juniper Networks, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ /* This file contains VFS file ops for the 9P protocol. * This makes the upper layer of the p9fs driver. These functions interact * with the VFS layer and lower layer of p9fs driver which is 9Pnet. All * the user file operations are handled here. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* File permissions. */ #define IEXEC 0000100 /* Executable. */ #define IWRITE 0000200 /* Writeable. */ #define IREAD 0000400 /* Readable. */ #define ISVTX 0001000 /* Sticky bit. */ #define ISGID 0002000 /* Set-gid. */ #define ISUID 0004000 /* Set-uid. */ static MALLOC_DEFINE(M_P9UIOV, "uio", "UIOV structures for strategy in p9fs"); extern uma_zone_t p9fs_io_buffer_zone; extern uma_zone_t p9fs_getattr_zone; extern uma_zone_t p9fs_setattr_zone; extern uma_zone_t p9fs_pbuf_zone; /* For the root vnode's vnops. */ struct vop_vector p9fs_vnops; static uint32_t p9fs_unix2p9_mode(uint32_t mode); static void p9fs_itimes(struct vnode *vp) { struct p9fs_node *node; struct timespec ts; struct p9fs_inode *inode; node = P9FS_VTON(vp); inode = &node->inode; vfs_timestamp(&ts); inode->i_mtime = ts.tv_sec; } /* * Cleanup the p9fs node, the in memory representation of a vnode for p9fs. * The cleanup includes invalidating all cache entries for the vnode, * destroying the vobject, removing vnode from hashlist, removing p9fs node * from the list of session p9fs nodes, and disposing of the p9fs node. * Basically it is doing a reverse of what a create/vget does. */ void p9fs_cleanup(struct p9fs_node *np) { struct vnode *vp; struct p9fs_session *vses; if (np == NULL) return; vp = P9FS_NTOV(np); vses = np->p9fs_ses; /* Remove the vnode from hash list if vnode is not already deleted */ if ((np->flags & P9FS_NODE_DELETED) == 0) vfs_hash_remove(vp); P9FS_LOCK(vses); if ((np->flags & P9FS_NODE_IN_SESSION) != 0) { np->flags &= ~P9FS_NODE_IN_SESSION; STAILQ_REMOVE(&vses->virt_node_list, np, p9fs_node, p9fs_node_next); } else { P9FS_UNLOCK(vses); return; } P9FS_UNLOCK(vses); /* Invalidate all entries to a particular vnode. */ cache_purge(vp); /* Destroy the vm object and flush associated pages. */ vnode_destroy_vobject(vp); /* Remove all the FID */ p9fs_fid_remove_all(np, FALSE); /* Dispose all node knowledge.*/ p9fs_destroy_node(&np); } /* * Reclaim VOP is defined to be called for every vnode. This starts off * the cleanup by clunking(remove the fid on the server) and calls * p9fs_cleanup to free all the resources allocated for p9fs node. */ static int p9fs_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp; struct p9fs_node *np; vp = ap->a_vp; np = P9FS_VTON(vp); P9_DEBUG(VOPS, "%s: vp:%p node:%p\n", __func__, vp, np); p9fs_cleanup(np); return (0); } /* * recycle vnodes which are no longer referenced i.e, their usecount is zero */ static int p9fs_inactive(struct vop_inactive_args *ap) { struct vnode *vp; struct p9fs_node *np; vp = ap->a_vp; np = P9FS_VTON(vp); P9_DEBUG(VOPS, "%s: vp:%p node:%p file:%s\n", __func__, vp, np, np->inode.i_name); if (np->flags & P9FS_NODE_DELETED) vrecycle(vp); return (0); } struct p9fs_lookup_alloc_arg { struct componentname *cnp; struct p9fs_node *dnp; struct p9_fid *newfid; }; /* Callback for vn_get_ino */ static int p9fs_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) { struct p9fs_lookup_alloc_arg *p9aa = arg; return (p9fs_vget_common(mp, NULL, p9aa->cnp->cn_lkflags, p9aa->dnp, p9aa->newfid, vpp, p9aa->cnp->cn_nameptr)); } /* * p9fs_lookup is called for every component name that is being searched for. * * I. If component is found on the server, we look for the in-memory * repesentation(vnode) of this component in namecache. * A. If the node is found in the namecache, we check is the vnode is still * valid. * 1. If it is still valid, return vnode. * 2. If it is not valid, we remove this vnode from the name cache and * create a new vnode for the component and return that vnode. * B. If the vnode is not found in the namecache, we look for it in the * hash list. * 1. If the vnode is in the hash list, we check if the vnode is still * valid. * a. If it is still valid, we add that vnode to the namecache for * future lookups and return the vnode. * b. If it is not valid, create a new vnode and p9fs node, * initialize them and return the vnode. * 2. If the vnode is not found in the hash list, we create a new vnode * and p9fs node, initialize them and return the vnode. * II. If the component is not found on the server, an error code is returned. * A. For the creation case, we return EJUSTRETURN so VFS can handle it. * B. For all other cases, ENOENT is returned. */ static int p9fs_lookup(struct vop_lookup_args *ap) { struct vnode *dvp; struct vnode **vpp, *vp; struct componentname *cnp; struct p9fs_node *dnp; /*dir p9_node */ struct p9fs_node *np; struct p9fs_session *vses; struct mount *mp; /* Get the mount point */ struct p9_fid *dvfid, *newfid; int error; struct vattr vattr; int flags; char tmpchr; dvp = ap->a_dvp; vpp = ap->a_vpp; cnp = ap->a_cnp; dnp = P9FS_VTON(dvp); error = 0; flags = cnp->cn_flags; *vpp = NULLVP; if (dnp == NULL) return (ENOENT); if (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1) { vref(dvp); *vpp = dvp; return (0); } vses = dnp->p9fs_ses; mp = vses->p9fs_mount; /* Do the cache part ourselves */ if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); if (dvp->v_type != VDIR) return (ENOTDIR); error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread); if (error) return (error); /* Do the directory walk on host to check if file exist */ dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error); if (error) return (error); /* * Save the character present at namelen in nameptr string and * null terminate the character to get the search name for p9_dir_walk * This is done to handle when lookup is for "a" and component * name contains a/b/c */ tmpchr = cnp->cn_nameptr[cnp->cn_namelen]; cnp->cn_nameptr[cnp->cn_namelen] = '\0'; /* * If the client_walk fails, it means the file looking for doesnt exist. * Create the file is the flags are set or just return the error */ newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error); cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; if (error != 0 || newfid == NULL) { /* Clunk the newfid if it is not NULL */ if (newfid != NULL) p9_client_clunk(newfid); if (error != ENOENT) return (error); /* The requested file was not found. */ if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN)) { if (mp->mnt_flag & MNT_RDONLY) return (EROFS); error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, curthread); if (!error) { return (EJUSTRETURN); } } return (error); } /* Look for the entry in the component cache*/ error = cache_lookup(dvp, vpp, cnp, NULL, NULL); if (error > 0 && error != ENOENT) { P9_DEBUG(VOPS, "%s: Cache lookup error %d \n", __func__, error); goto out; } if (error == -1) { vp = *vpp; /* Check if the entry in cache is stale or not */ if ((p9fs_node_cmp(vp, &newfid->qid) == 0) && ((error = VOP_GETATTR(vp, &vattr, cnp->cn_cred)) == 0)) { goto out; } /* * This case, we have an error coming from getattr, * act accordingly. */ cache_purge(vp); if (dvp != vp) vput(vp); else vrele(vp); *vpp = NULLVP; } else if (error == ENOENT) { if (VN_IS_DOOMED(dvp)) goto out; if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0) { error = ENOENT; goto out; } cache_purge_negative(dvp); } /* Reset values */ error = 0; vp = NULLVP; tmpchr = cnp->cn_nameptr[cnp->cn_namelen]; cnp->cn_nameptr[cnp->cn_namelen] = '\0'; /* * Looks like we have found an entry. Now take care of all other cases. */ if (flags & ISDOTDOT) { struct p9fs_lookup_alloc_arg p9aa; p9aa.cnp = cnp; p9aa.dnp = dnp; p9aa.newfid = newfid; error = vn_vget_ino_gen(dvp, p9fs_lookup_alloc, &p9aa, 0, &vp); if (error) goto out; *vpp = vp; } else { /* * client_walk is equivalent to searching a component name in a * directory(fid) here. If new fid is returned, we have found an * entry for this component name so, go and create the rest of * the vnode infra(vget_common) for the returned newfid. */ if ((cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN)) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, curthread); if (error) goto out; error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags, dnp, newfid, &vp, cnp->cn_nameptr); if (error) goto out; *vpp = vp; np = P9FS_VTON(vp); if ((dnp->inode.i_mode & ISVTX) && cnp->cn_cred->cr_uid != 0 && cnp->cn_cred->cr_uid != dnp->inode.n_uid && cnp->cn_cred->cr_uid != np->inode.n_uid) { vput(*vpp); *vpp = NULL; cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; return (EPERM); } } else { error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags, dnp, newfid, &vp, cnp->cn_nameptr); if (error) goto out; *vpp = vp; } } cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; /* Store the result the cache if MAKEENTRY is specified in flags */ if ((cnp->cn_flags & MAKEENTRY) != 0) cache_enter(dvp, *vpp, cnp); return (error); out: cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; p9_client_clunk(newfid); return (error); } /* * Common creation function for file/directory with respective flags. We first * open the parent directory in order to create the file under it. For this, * as 9P protocol suggests, we need to call client_walk to create the open fid. * Once we have the open fid, the file_create function creates the direntry with * the name and perm specified under the parent dir. If this succeeds (an entry * is created for the new file on the server), we create our metadata for this * file (vnode, p9fs node calling vget). Once we are done, we clunk the open * fid of the parent directory. */ static int create_common(struct p9fs_node *dnp, struct componentname *cnp, char *extension, uint32_t perm, uint8_t mode, struct vnode **vpp) { char tmpchr; struct p9_fid *dvfid, *ofid, *newfid; struct p9fs_session *vses; struct mount *mp; int error; P9_DEBUG(VOPS, "%s: name %s\n", __func__, cnp->cn_nameptr); vses = dnp->p9fs_ses; mp = vses->p9fs_mount; newfid = NULL; error = 0; dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error); if (error != 0) return (error); /* Clone the directory fid to create the new file */ ofid = p9_client_walk(dvfid, 0, NULL, 1, &error); if (error != 0) return (error); /* * Save the character present at namelen in nameptr string and * null terminate the character to get the search name for p9_dir_walk */ tmpchr = cnp->cn_nameptr[cnp->cn_namelen]; cnp->cn_nameptr[cnp->cn_namelen] = '\0'; error = p9_client_file_create(ofid, cnp->cn_nameptr, perm, mode, extension); if (error != 0) { P9_DEBUG(ERROR, "%s: p9_client_fcreate failed %d\n", __func__, error); goto out; } /* If its not hardlink only then do the walk, else we are done. */ if (!(perm & P9PROTO_DMLINK)) { /* * Do the lookup part and add the vnode, p9fs node. Note that vpp * is filled in here. */ newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error); if (newfid != NULL) { error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags, dnp, newfid, vpp, cnp->cn_nameptr); if (error != 0) goto out; } else { /* Not found return NOENTRY.*/ goto out; } if ((cnp->cn_flags & MAKEENTRY) != 0) cache_enter(P9FS_NTOV(dnp), *vpp, cnp); } P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n", __func__, *vpp, dnp, (uintmax_t)dvfid->fid); /* Clunk the open ofid. */ if (ofid != NULL) (void)p9_client_clunk(ofid); cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; return (0); out: if (ofid != NULL) (void)p9_client_clunk(ofid); if (newfid != NULL) (void)p9_client_clunk(newfid); cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; return (error); } /* * This is the main file creation VOP. Make the permissions of the new * file and call the create_common common code to complete the create. */ static int p9fs_create(struct vop_create_args *ap) { struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; uint32_t mode; struct p9fs_node *dnp; struct p9fs_inode *dinode; uint32_t perm; int ret; dvp = ap->a_dvp; vpp = ap->a_vpp; cnp = ap->a_cnp; dnp = P9FS_VTON(dvp); dinode = &dnp->inode; mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); perm = p9fs_unix2p9_mode(mode); P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp); ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp); if (ret == 0) { P9FS_INCR_LINKS(dinode); } return (ret); } /* * p9fs_mkdir is the main directory creation vop. Make the permissions of the new dir * and call the create_common common code to complete the create. */ static int p9fs_mkdir(struct vop_mkdir_args *ap) { struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; uint32_t mode; struct p9fs_node *dnp; struct p9fs_inode *dinode; uint32_t perm; int ret; dvp = ap->a_dvp; vpp = ap->a_vpp; cnp = ap->a_cnp; dnp = P9FS_VTON(dvp); dinode = &dnp->inode; mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); perm = p9fs_unix2p9_mode(mode | S_IFDIR); P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp); ret = create_common(dnp, cnp, NULL, perm, P9PROTO_ORDWR, vpp); if (ret == 0) P9FS_INCR_LINKS(dinode); return (ret); } /* * p9fs_mknod is the main node creation vop. Make the permissions of the new node * and call the create_common common code to complete the create. */ static int p9fs_mknod(struct vop_mknod_args *ap) { struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; uint32_t mode; struct p9fs_node *dnp; struct p9fs_inode *dinode; uint32_t perm; int ret; dvp = ap->a_dvp; vpp = ap->a_vpp; cnp = ap->a_cnp; dnp = P9FS_VTON(dvp); dinode = &dnp->inode; mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); perm = p9fs_unix2p9_mode(mode); P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp); ret = create_common(dnp, cnp, NULL, perm, P9PROTO_OREAD, vpp); if (ret == 0) { P9FS_INCR_LINKS(dinode); } return (ret); } /* Convert open mode permissions to P9 */ static int p9fs_uflags_mode(int uflags, int extended) { uint32_t ret; /* Convert first to O flags.*/ uflags = OFLAGS(uflags); switch (uflags & 3) { case O_RDONLY: ret = P9PROTO_OREAD; break; case O_WRONLY: ret = P9PROTO_OWRITE; break; case O_RDWR: ret = P9PROTO_ORDWR; break; } if (extended) { if (uflags & O_EXCL) ret |= P9PROTO_OEXCL; if (uflags & O_APPEND) ret |= P9PROTO_OAPPEND; } return (ret); } /* * This is the main open VOP for every file open. If the file is already * open, then increment and return. If there is no open fid for this file, * there needs to be a client_walk which creates a new open fid for this file. * Once we have a open fid, call the open on this file with the mode creating * the vobject. */ static int p9fs_open(struct vop_open_args *ap) { int error; struct vnode *vp; struct p9fs_node *np; struct p9fs_session *vses; struct p9_fid *vofid, *vfid; size_t filesize; uint32_t mode; error = 0; vp = ap->a_vp; np = P9FS_VTON(vp); vses = np->p9fs_ses; P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp); if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) return (EOPNOTSUPP); error = p9fs_reload_stats_dotl(vp, ap->a_cred); if (error != 0) return (error); ASSERT_VOP_LOCKED(vp, __func__); /* * Invalidate the pages of the vm_object cache if the file is modified * based on the flag set in reload stats */ if (vp->v_type == VREG && (np->flags & P9FS_NODE_MODIFIED) != 0) { error = vinvalbuf(vp, 0, 0, 0); if (error != 0) return (error); np->flags &= ~P9FS_NODE_MODIFIED; } vfid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VFID, -1, &error); if (error != 0) return (error); /* * Translate kernel fflags to 9p mode */ mode = p9fs_uflags_mode(ap->a_mode, 1); /* * Search the fid in vofid_list for current user. If found increase the open * count and return. If not found clone a new fid and open the file using * that cloned fid. */ vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, mode, &error); if (vofid != NULL) { vofid->v_opens++; return (0); } else { /*vofid is the open fid for this file.*/ vofid = p9_client_walk(vfid, 0, NULL, 1, &error); if (error != 0) return (error); } error = p9_client_open(vofid, mode); if (error != 0) p9_client_clunk(vofid); else { vofid->v_opens = 1; filesize = np->inode.i_size; vnode_create_vobject(vp, filesize, ap->a_td); p9fs_fid_add(np, vofid, VOFID); } return (error); } /* * Close the open references. Just reduce the open count on vofid and return. * Let clunking of VOFID happen in p9fs_reclaim. */ static int p9fs_close(struct vop_close_args *ap) { struct vnode *vp; struct p9fs_node *np; struct p9fs_session *vses; struct p9_fid *vofid; int error; vp = ap->a_vp; np = P9FS_VTON(vp); if (np == NULL) return (0); vses = np->p9fs_ses; error = 0; P9_DEBUG(VOPS, "%s: file_name %s\n", __func__, np->inode.i_name); /* * Translate kernel fflags to 9p mode */ vofid = p9fs_get_fid(vses->clnt, np, ap->a_cred, VOFID, p9fs_uflags_mode(ap->a_fflag, 1), &error); if (vofid == NULL) return (0); vofid->v_opens--; return (0); } /* Helper routine for checking if fileops are possible on this file */ static int p9fs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode) { /* Check if we are allowed to write */ switch (vap->va_type) { case VDIR: case VLNK: case VREG: /* * Normal nodes: check if we're on a read-only mounted * file system and bail out if we're trying to write. */ if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); break; case VBLK: case VCHR: case VSOCK: case VFIFO: /* * Special nodes: even on read-only mounted file systems * these are allowed to be written to if permissions allow. */ break; default: /* No idea what this is */ return (EINVAL); } return (0); } /* Check the access permissions of the file. */ static int p9fs_access(struct vop_access_args *ap) { struct vnode *vp; accmode_t accmode; struct ucred *cred; struct vattr vap; int error; vp = ap->a_vp; accmode = ap->a_accmode; cred = ap->a_cred; P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp); /* make sure getattr is working correctly and is defined.*/ error = VOP_GETATTR(vp, &vap, cred); if (error != 0) return (error); error = p9fs_check_possible(vp, &vap, accmode); if (error != 0) return (error); /* Call the Generic Access check in VOPS*/ error = vaccess(vp->v_type, vap.va_mode, vap.va_uid, vap.va_gid, accmode, cred); return (error); } /* * Reload the file stats from the server and update the inode structure present * in p9fs node. */ int p9fs_reload_stats_dotl(struct vnode *vp, struct ucred *cred) { struct p9_stat_dotl *stat; int error; struct p9fs_node *node; struct p9fs_session *vses; struct p9_fid *vfid; error = 0; node = P9FS_VTON(vp); vses = node->p9fs_ses; vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OREAD, &error); if (vfid == NULL) { vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error); if (error) return (error); } stat = uma_zalloc(p9fs_getattr_zone, M_WAITOK | M_ZERO); error = p9_client_getattr(vfid, stat, P9PROTO_STATS_ALL); if (error != 0) { P9_DEBUG(ERROR, "%s: p9_client_getattr failed: %d\n", __func__, error); goto out; } /* Init the vnode with the disk info */ p9fs_stat_vnode_dotl(stat, vp); out: if (stat != NULL) { uma_zfree(p9fs_getattr_zone, stat); } return (error); } /* * Read the current inode values into the vap attr. We reload the stats from * the server. */ static int p9fs_getattr_dotl(struct vop_getattr_args *ap) { struct vnode *vp; struct vattr *vap; struct p9fs_node *node; struct p9fs_inode *inode; int error; vp = ap->a_vp; vap = ap->a_vap; node = P9FS_VTON(vp); if (node == NULL) return (ENOENT); inode = &node->inode; P9_DEBUG(VOPS, "%s: %u %u\n", __func__, inode->i_mode, IFTOVT(inode->i_mode)); /* Reload our stats once to get the right values.*/ error = p9fs_reload_stats_dotl(vp, ap->a_cred); if (error != 0) { P9_DEBUG(ERROR, "%s: failed: %d\n", __func__, error); return (error); } /* Basic info */ VATTR_NULL(vap); vap->va_atime.tv_sec = inode->i_atime; vap->va_mtime.tv_sec = inode->i_mtime; vap->va_ctime.tv_sec = inode->i_ctime; vap->va_atime.tv_nsec = inode->i_atime_nsec; vap->va_mtime.tv_nsec = inode->i_mtime_nsec; vap->va_ctime.tv_nsec = inode->i_ctime_nsec; vap->va_type = IFTOVT(inode->i_mode); vap->va_mode = inode->i_mode; vap->va_uid = inode->n_uid; vap->va_gid = inode->n_gid; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; vap->va_size = inode->i_size; vap->va_nlink = inode->i_links_count; vap->va_blocksize = inode->blksize; vap->va_fileid = inode->i_qid_path; vap->va_flags = inode->i_flags; vap->va_gen = inode->gen; vap->va_filerev = inode->data_version; vap->va_vaflags = 0; vap->va_bytes = inode->blocks * P9PROTO_TGETATTR_BLK; return (0); } /* Convert a standard FreeBSD permission to P9. */ static uint32_t p9fs_unix2p9_mode(uint32_t mode) { uint32_t res; res = mode & 0777; if (S_ISDIR(mode)) res |= P9PROTO_DMDIR; if (S_ISSOCK(mode)) res |= P9PROTO_DMSOCKET; if (S_ISLNK(mode)) res |= P9PROTO_DMSYMLINK; if (S_ISFIFO(mode)) res |= P9PROTO_DMNAMEDPIPE; if ((mode & S_ISUID) == S_ISUID) res |= P9PROTO_DMSETUID; if ((mode & S_ISGID) == S_ISGID) res |= P9PROTO_DMSETGID; if ((mode & S_ISVTX) == S_ISVTX) res |= P9PROTO_DMSETVTX; return (res); } /* Update inode with the stats read from server.(9P2000.L version) */ int p9fs_stat_vnode_dotl(struct p9_stat_dotl *stat, struct vnode *vp) { struct p9fs_node *np; struct p9fs_inode *inode; np = P9FS_VTON(vp); inode = &np->inode; ASSERT_VOP_LOCKED(vp, __func__); /* Update the pager size if file size changes on host */ if (inode->i_size != stat->st_size) { inode->i_size = stat->st_size; if (vp->v_type == VREG) vnode_pager_setsize(vp, inode->i_size); } inode->i_mtime = stat->st_mtime_sec; inode->i_atime = stat->st_atime_sec; inode->i_ctime = stat->st_ctime_sec; inode->i_mtime_nsec = stat->st_mtime_nsec; inode->i_atime_nsec = stat->st_atime_nsec; inode->i_ctime_nsec = stat->st_ctime_nsec; inode->n_uid = stat->st_uid; inode->n_gid = stat->st_gid; inode->i_mode = stat->st_mode; vp->v_type = IFTOVT(inode->i_mode); inode->i_links_count = stat->st_nlink; inode->blksize = stat->st_blksize; inode->blocks = stat->st_blocks; inode->gen = stat->st_gen; inode->data_version = stat->st_data_version; ASSERT_VOP_LOCKED(vp, __func__); /* Setting a flag if file changes based on qid version */ if (np->vqid.qid_version != stat->qid.version) np->flags |= P9FS_NODE_MODIFIED; memcpy(&np->vqid, &stat->qid, sizeof(stat->qid)); return (0); } /* * Write the current in memory inode stats into persistent stats structure * to write to the server(for linux version). */ static int p9fs_inode_to_iattr(struct p9fs_inode *inode, struct p9_iattr_dotl *p9attr) { p9attr->size = inode->i_size; p9attr->mode = inode->i_mode; p9attr->uid = inode->n_uid; p9attr->gid = inode->n_gid; p9attr->atime_sec = inode->i_atime; p9attr->atime_nsec = inode->i_atime_nsec; p9attr->mtime_sec = inode->i_mtime; p9attr->mtime_nsec = inode->i_mtime_nsec; return (0); } /* * Modify the ownership of a file whenever the chown is called on the * file. */ static int p9fs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct thread *td) { struct p9fs_node *np; struct p9fs_inode *inode; uid_t ouid; gid_t ogid; int error; np = P9FS_VTON(vp); inode = &np->inode; if (uid == (uid_t)VNOVAL) uid = inode->n_uid; if (gid == (gid_t)VNOVAL) gid = inode->n_gid; /* * To modify the ownership of a file, must possess VADMIN for that * file. */ if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) return (error); /* * To change the owner of a file, or change the group of a file to a * group of which we are not a member, the caller must have * privilege. */ if (((uid != inode->n_uid && uid != cred->cr_uid) || (gid != inode->n_gid && !groupmember(gid, cred))) && (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) return (error); ogid = inode->n_gid; ouid = inode->n_uid; inode->n_gid = gid; inode->n_uid = uid; if ((inode->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) inode->i_mode &= ~(ISUID | ISGID); } P9_DEBUG(VOPS, "%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, cred, td); return (0); } /* * Update the in memory inode with all chmod new permissions/mode. Typically a * setattr is called to update it to server. */ static int p9fs_chmod(struct vnode *vp, uint32_t mode, struct ucred *cred, struct thread *td) { struct p9fs_node *np; struct p9fs_inode *inode; uint32_t nmode; int error; np = P9FS_VTON(vp); inode = &np->inode; P9_DEBUG(VOPS, "%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp, mode, cred, td); /* * To modify the permissions on a file, must possess VADMIN * for that file. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * Privileged processes may set the sticky bit on non-directories, * as well as set the setgid bit on a file with a group that the * process is not a member of. Both of these are allowed in * jail(8). */ if (vp->v_type != VDIR && (mode & S_ISTXT)) { if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) return (EFTYPE); } if (!groupmember(inode->n_gid, cred) && (mode & ISGID)) { error = priv_check_cred(cred, PRIV_VFS_SETGID); if (error != 0) return (error); } /* * Deny setting setuid if we are not the file owner. */ if ((mode & ISUID) && inode->n_uid != cred->cr_uid) { error = priv_check_cred(cred, PRIV_VFS_ADMIN); if (error != 0) return (error); } nmode = inode->i_mode; nmode &= ~ALLPERMS; nmode |= (mode & ALLPERMS); inode->i_mode = nmode; P9_DEBUG(VOPS, "%s: to mode %x %d \n ", __func__, nmode, error); return (error); } /* * Set the attributes of a file referenced by fid. A valid bitmask is sent * in request selecting which fields to set */ static int p9fs_setattr_dotl(struct vop_setattr_args *ap) { struct vnode *vp; struct vattr *vap; struct p9fs_node *node; struct p9fs_inode *inode; struct ucred *cred; struct thread *td; struct p9_iattr_dotl *p9attr; struct p9fs_session *vses; struct p9_fid *vfid; uint64_t oldfilesize; int error; vp = ap->a_vp; vap = ap->a_vap; node = P9FS_VTON(vp); inode = &node->inode; cred = ap->a_cred; td = curthread; vses = node->p9fs_ses; error = 0; if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { P9_DEBUG(ERROR, "%s: unsettable attribute\n", __func__); return (EINVAL); } /* Disallow write attempts on read only filesystem */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* Setting of flags is not supported */ if (vap->va_flags != VNOVAL) return (EOPNOTSUPP); /* Allocate p9attr struct */ p9attr = uma_zalloc(p9fs_setattr_zone, M_WAITOK | M_ZERO); if (p9attr == NULL) return (ENOMEM); /* Check if we need to change the ownership of the file*/ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { P9_DEBUG(VOPS, "%s: vp:%p td:%p uid/gid %x/%x\n", __func__, vp, td, vap->va_uid, vap->va_gid); error = p9fs_chown(vp, vap->va_uid, vap->va_gid, cred, td); p9attr->valid |= P9PROTO_SETATTR_UID | P9PROTO_SETATTR_GID | P9PROTO_SETATTR_MODE; if (error) goto out; } /* Check for mode changes */ if (vap->va_mode != (mode_t)VNOVAL) { P9_DEBUG(VOPS, "%s: vp:%p td:%p mode %x\n", __func__, vp, td, vap->va_mode); error = p9fs_chmod(vp, (int)vap->va_mode, cred, td); p9attr->valid |= P9PROTO_SETATTR_MODE; if (error) goto out; } /* Update the size of the file and update mtime */ if (vap->va_size != (uint64_t)VNOVAL) { P9_DEBUG(VOPS, "%s: vp:%p td:%p size:%jx\n", __func__, vp, td, (uintmax_t)vap->va_size); switch (vp->v_type) { case VDIR: error = EISDIR; goto out; case VLNK: case VREG: /* Invalidate cached pages of vp */ error = vinvalbuf(vp, 0, 0, 0); if (error) goto out; oldfilesize = inode->i_size; inode->i_size = vap->va_size; /* Update the p9fs_inode time */ p9fs_itimes(vp); p9attr->valid |= P9PROTO_SETATTR_SIZE | P9PROTO_SETATTR_ATIME | P9PROTO_SETATTR_MTIME | P9PROTO_SETATTR_ATIME_SET | P9PROTO_SETATTR_MTIME_SET ; break; default: goto out; } } else if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { P9_DEBUG(VOPS, "%s: vp:%p td:%p time a/m %jx/%jx/\n", __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec, (uintmax_t)vap->va_mtime.tv_sec); /* Update the p9fs_inode times */ p9fs_itimes(vp); p9attr->valid |= P9PROTO_SETATTR_ATIME | P9PROTO_SETATTR_MTIME | P9PROTO_SETATTR_ATIME_SET | P9PROTO_SETATTR_MTIME_SET; } vfid = p9fs_get_fid(vses->clnt, node, cred, VOFID, P9PROTO_OWRITE, &error); if (vfid == NULL) { vfid = p9fs_get_fid(vses->clnt, node, cred, VFID, -1, &error); if (error) goto out; } /* Write the inode structure values into p9attr */ p9fs_inode_to_iattr(inode, p9attr); error = p9_client_setattr(vfid, p9attr); if (vap->va_size != (uint64_t)VNOVAL && vp->v_type == VREG) { if (error) inode->i_size = oldfilesize; else vnode_pager_setsize(vp, inode->i_size); } out: if (p9attr) { uma_zfree(p9fs_setattr_zone, p9attr); } P9_DEBUG(VOPS, "%s: error: %d\n", __func__, error); return (error); } struct open_fid_state { struct p9_fid *vofid; int fflags; int opened; }; /* * TODO: change this to take P9PROTO_* mode and avoid routing through * VOP_OPEN, factoring out implementation of p9fs_open. */ static int p9fs_get_open_fid(struct vnode *vp, int fflags, struct ucred *cr, struct open_fid_state *statep) { struct p9fs_node *np; struct p9fs_session *vses; struct p9_fid *vofid; int mode = p9fs_uflags_mode(fflags, TRUE); int error = 0; statep->opened = FALSE; np = P9FS_VTON(vp); vses = np->p9fs_ses; vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error); if (vofid == NULL) { error = VOP_OPEN(vp, fflags, cr, curthread, NULL); if (error) { return (error); } vofid = p9fs_get_fid(vses->clnt, np, cr, VOFID, mode, &error); if (vofid == NULL) { return (EBADF); } statep->fflags = fflags; statep->opened = TRUE; } statep->vofid = vofid; return (0); } static void p9fs_release_open_fid(struct vnode *vp, struct ucred *cr, struct open_fid_state *statep) { if (statep->opened) { (void) VOP_CLOSE(vp, statep->fflags, cr, curthread); } } /* * An I/O buffer is used to to do any transfer. The uio is the vfs structure we * need to copy data into. As long as resid is greater than zero, we call * client_read to read data from offset(offset into the file) in the open fid * for the file into the I/O buffer. The data is read into the user data buffer. */ static int p9fs_read(struct vop_read_args *ap) { struct vnode *vp; struct uio *uio; struct p9fs_node *np; uint64_t offset; int64_t ret; uint64_t resid; uint32_t count; int error; char *io_buffer = NULL; uint64_t filesize; struct open_fid_state ostate; vp = ap->a_vp; uio = ap->a_uio; np = P9FS_VTON(vp); error = 0; if (vp->v_type == VCHR || vp->v_type == VBLK) return (EOPNOTSUPP); if (vp->v_type != VREG) return (EISDIR); if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); error = p9fs_get_open_fid(vp, FREAD, ap->a_cred, &ostate); if (error) return (error); /* where in the file are we to start reading */ offset = uio->uio_offset; filesize = np->inode.i_size; if (uio->uio_offset >= filesize) goto out; P9_DEBUG(VOPS, "%s: called %jd at %ju\n", __func__, (intmax_t)uio->uio_resid, (uintmax_t)uio->uio_offset); /* Work with a local buffer from the pool for this vop */ io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO); while ((resid = uio->uio_resid) > 0) { if (offset >= filesize) break; count = MIN(filesize - uio->uio_offset , resid); if (count == 0) break; /* Copy count bytes into the uio */ ret = p9_client_read(ostate.vofid, offset, count, io_buffer); /* * This is the only place in the entire p9fs where we check the * error for < 0 as p9_client_read/write return the number of * bytes instead of an error code. In this case if ret is < 0, * it means there is an IO error. */ if (ret < 0) { error = -ret; goto out; } error = uiomove(io_buffer, ret, uio); if (error != 0) goto out; offset += ret; } uio->uio_offset = offset; out: uma_zfree(p9fs_io_buffer_zone, io_buffer); p9fs_release_open_fid(vp, ap->a_cred, &ostate); return (error); } /* * The user buffer contains the data to be written. This data is copied first * from uio into I/O buffer. This I/O buffer is used to do the client_write to * the fid of the file starting from the offset given upto count bytes. The * number of bytes written is returned to the caller. */ static int p9fs_write(struct vop_write_args *ap) { struct vnode *vp; struct uio *uio; struct p9fs_node *np; uint64_t off, offset; int64_t ret; uint64_t resid, bytes_written; uint32_t count; int error, ioflag; uint64_t file_size; char *io_buffer = NULL; struct open_fid_state ostate; vp = ap->a_vp; uio = ap->a_uio; np = P9FS_VTON(vp); error = 0; ioflag = ap->a_ioflag; error = p9fs_get_open_fid(vp, FWRITE, ap->a_cred, &ostate); if (error) return (error); P9_DEBUG(VOPS, "%s: %#zx at %#jx\n", __func__, uio->uio_resid, (uintmax_t)uio->uio_offset); if (uio->uio_offset < 0) { error = EINVAL; goto out; } if (uio->uio_resid == 0) goto out; file_size = np->inode.i_size; switch (vp->v_type) { case VREG: if (ioflag & IO_APPEND) uio->uio_offset = file_size; break; case VDIR: return (EISDIR); case VLNK: break; default: panic("%s: bad file type vp: %p", __func__, vp); } resid = uio->uio_resid; offset = uio->uio_offset; bytes_written = 0; error = 0; io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO); while ((resid = uio->uio_resid) > 0) { off = 0; count = MIN(resid, P9FS_IOUNIT); error = uiomove(io_buffer, count, uio); if (error != 0) { P9_DEBUG(ERROR, "%s: uiomove failed: %d\n", __func__, error); goto out; } /* While count still exists, keep writing.*/ while (count > 0) { /* Copy count bytes from the uio */ ret = p9_client_write(ostate.vofid, offset, count, io_buffer + off); if (ret < 0) { if (bytes_written == 0) { error = -ret; goto out; } else { break; } } P9_DEBUG(VOPS, "%s: write %#zx at %#jx\n", __func__, uio->uio_resid, (uintmax_t)uio->uio_offset); off += ret; offset += ret; bytes_written += ret; count -= ret; } } /* Update the fields in the node to reflect the change*/ if (file_size < uio->uio_offset + uio->uio_resid) { np->inode.i_size = uio->uio_offset + uio->uio_resid; vnode_pager_setsize(vp, uio->uio_offset + uio->uio_resid); } out: if (io_buffer) uma_zfree(p9fs_io_buffer_zone, io_buffer); p9fs_release_open_fid(vp, ap->a_cred, &ostate); return (error); } /* * Common handler of all removal-related VOPs (e.g. rmdir, rm). Perform the * client_remove op to send messages to remove the node's fid on the server. * After that, does a node metadata cleanup on client side. */ static int -remove_common(struct p9fs_node *np, struct ucred *cred) +remove_common(struct p9fs_node *dnp, struct p9fs_node *np, const char *name, + struct ucred *cred) { int error; struct p9fs_session *vses; struct vnode *vp; struct p9_fid *vfid; error = 0; vses = np->p9fs_ses; vp = P9FS_NTOV(np); - vfid = p9fs_get_fid(vses->clnt, np, cred, VFID, -1, &error); + vfid = p9fs_get_fid(vses->clnt, dnp, cred, VFID, -1, &error); if (error != 0) return (error); - error = p9_client_remove(vfid); + error = p9_client_unlink(vfid, name, + np->v_node->v_type == VDIR ? P9PROTO_UNLINKAT_REMOVEDIR : 0); if (error != 0) return (error); /* Remove all non-open fids associated with the vp */ - p9fs_fid_remove_all(np, TRUE); + if (np->inode.i_links_count == 1) + p9fs_fid_remove_all(np, TRUE); /* Invalidate all entries of vnode from name cache and hash list. */ cache_purge(vp); - vfs_hash_remove(vp); + np->flags |= P9FS_NODE_DELETED; return (error); } /* Remove vop for all files. Call common code for remove and adjust links */ static int p9fs_remove(struct vop_remove_args *ap) { struct vnode *vp; struct p9fs_node *np; struct vnode *dvp; struct p9fs_node *dnp; struct p9fs_inode *dinode; + struct componentname *cnp; int error; + cnp = ap->a_cnp; vp = ap->a_vp; np = P9FS_VTON(vp); dvp = ap->a_dvp; dnp = P9FS_VTON(dvp); dinode = &dnp->inode; P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np); if (vp->v_type == VDIR) return (EISDIR); - error = remove_common(np, ap->a_cnp->cn_cred); + error = remove_common(dnp, np, cnp->cn_nameptr, cnp->cn_cred); if (error == 0) P9FS_DECR_LINKS(dinode); return (error); } /* Remove vop for all directories. Call common code for remove and adjust links */ static int p9fs_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp; struct p9fs_node *np; struct vnode *dvp; struct p9fs_node *dnp; struct p9fs_inode *dinode; + struct componentname *cnp; int error; + cnp = ap->a_cnp; vp = ap->a_vp; np = P9FS_VTON(vp); dvp = ap->a_dvp; dnp = P9FS_VTON(dvp); dinode = &dnp->inode; P9_DEBUG(VOPS, "%s: vp %p node %p \n", __func__, vp, np); - error = remove_common(np, ap->a_cnp->cn_cred); + error = remove_common(dnp, np, cnp->cn_nameptr, cnp->cn_cred); if (error == 0) P9FS_DECR_LINKS(dinode); return (error); } /* * Create symlinks. Make the permissions and call create_common code * for Soft links. */ static int p9fs_symlink(struct vop_symlink_args *ap) { struct vnode *dvp; struct vnode **vpp; struct vattr *vap; struct componentname *cnp; char *symtgt; struct p9fs_node *dnp; struct p9fs_session *vses; struct mount *mp; struct p9_fid *dvfid, *newfid; int error; char tmpchr; gid_t gid; dvp = ap->a_dvp; vpp = ap->a_vpp; vap = ap->a_vap; cnp = ap->a_cnp; symtgt = (char*)(uintptr_t) ap->a_target; dnp = P9FS_VTON(dvp); vses = dnp->p9fs_ses; mp = vses->p9fs_mount; newfid = NULL; error = 0; gid = vap->va_gid; P9_DEBUG(VOPS, "%s: dvp %p\n", __func__, dvp); /* * Save the character present at namelen in nameptr string and * null terminate the character to get the search name for p9_dir_walk */ tmpchr = cnp->cn_nameptr[cnp->cn_namelen]; cnp->cn_nameptr[cnp->cn_namelen] = '\0'; dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error); if (error != 0) goto out; error = p9_create_symlink(dvfid, cnp->cn_nameptr, symtgt, gid); if (error != 0) goto out; /*create vnode for symtgt */ newfid = p9_client_walk(dvfid, 1, &cnp->cn_nameptr, 1, &error); if (newfid != NULL) { error = p9fs_vget_common(mp, NULL, cnp->cn_lkflags, dnp, newfid, vpp, cnp->cn_nameptr); if (error != 0) goto out; } else goto out; if ((cnp->cn_flags & MAKEENTRY) != 0) { cache_enter(P9FS_NTOV(dnp), *vpp, cnp); } P9_DEBUG(VOPS, "%s: created file under vp %p node %p fid %ju\n", __func__, *vpp, dnp, (uintmax_t)dvfid->fid); cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; return (error); out: if (newfid != NULL) p9_client_clunk(newfid); cnp->cn_nameptr[cnp->cn_namelen] = tmpchr; return (error); } /* Create hard link */ static int p9fs_link(struct vop_link_args *ap) { struct vnode *vp; struct vnode *tdvp; struct componentname *cnp; struct p9fs_node *dnp; struct p9fs_node *np; struct p9fs_inode *inode; struct p9fs_session *vses; struct p9_fid *dvfid, *oldvfid; int error; vp = ap->a_vp; tdvp = ap->a_tdvp; cnp = ap->a_cnp; dnp = P9FS_VTON(tdvp); np = P9FS_VTON(vp); inode = &np->inode; vses = np->p9fs_ses; error = 0; P9_DEBUG(VOPS, "%s: tdvp %p vp %p\n", __func__, tdvp, vp); dvfid = p9fs_get_fid(vses->clnt, dnp, cnp->cn_cred, VFID, -1, &error); if (error != 0) return (error); oldvfid = p9fs_get_fid(vses->clnt, np, cnp->cn_cred, VFID, -1, &error); if (error != 0) return (error); error = p9_create_hardlink(dvfid, oldvfid, cnp->cn_nameptr); if (error != 0) return (error); /* Increment ref count on the inode */ P9FS_INCR_LINKS(inode); return (0); } /* Read contents of the symbolic link */ static int p9fs_readlink(struct vop_readlink_args *ap) { struct vnode *vp; struct uio *uio; struct p9fs_node *dnp; struct p9fs_session *vses; struct p9_fid *dvfid; int error, len; char *target; vp = ap->a_vp; uio = ap->a_uio; dnp = P9FS_VTON(vp); vses = dnp->p9fs_ses; error = 0; P9_DEBUG(VOPS, "%s: vp %p\n", __func__, vp); dvfid = p9fs_get_fid(vses->clnt, dnp, ap->a_cred, VFID, -1, &error); if (error != 0) return (error); error = p9_readlink(dvfid, &target); if (error != 0) return (error); len = strlen(target); error = uiomove(target, len, uio); return (0); } /* * Iterate through a directory. An entire 8k data is read into the I/O buffer. * This buffer is parsed to make dir entries and fed to the user buffer to * complete it to the VFS. */ static int p9fs_readdir(struct vop_readdir_args *ap) { struct uio *uio; struct vnode *vp; struct dirent cde; int64_t offset; uint64_t diroffset; struct p9fs_node *np; int error; int32_t count; struct p9_client *clnt; struct p9_dirent dent; char *io_buffer; struct p9_fid *vofid; uio = ap->a_uio; vp = ap->a_vp; np = P9FS_VTON(ap->a_vp); offset = 0; diroffset = 0; error = 0; count = 0; clnt = np->p9fs_ses->clnt; P9_DEBUG(VOPS, "%s: vp %p, offset %jd, resid %zd\n", __func__, vp, (intmax_t) uio->uio_offset, uio->uio_resid); if (ap->a_uio->uio_iov->iov_len <= 0) return (EINVAL); if (vp->v_type != VDIR) return (ENOTDIR); vofid = p9fs_get_fid(clnt, np, ap->a_cred, VOFID, P9PROTO_OREAD, &error); if (vofid == NULL) { P9_DEBUG(ERROR, "%s: NULL FID\n", __func__); return (EBADF); } io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK); /* We haven't reached the end yet. read more. */ diroffset = uio->uio_offset; while (uio->uio_resid >= sizeof(struct dirent)) { /* * We need to read more data as what is indicated by filesize because * filesize is based on data stored in struct dirent structure but * we read data in struct p9_dirent format which has different size. * Hence we read max data(P9FS_IOUNIT) everytime from host, convert * it into struct dirent structure and send it back. */ count = P9FS_IOUNIT; bzero(io_buffer, P9FS_MTU); count = p9_client_readdir(vofid, (char *)io_buffer, diroffset, count); if (count == 0) break; if (count < 0) { error = EIO; goto out; } offset = 0; while (offset + QEMU_DIRENTRY_SZ <= count) { /* * Read and make sense out of the buffer in one dirent * This is part of 9p protocol read. This reads one p9_dirent, * appends it to dirent(FREEBSD specifc) and continues to parse the buffer. */ bzero(&dent, sizeof(dent)); offset = p9_dirent_read(clnt, io_buffer, offset, count, &dent); if (offset < 0 || offset > count) { error = EIO; goto out; } bzero(&cde, sizeof(cde)); strncpy(cde.d_name, dent.d_name, dent.len); cde.d_fileno = dent.qid.path; cde.d_type = dent.d_type; cde.d_namlen = dent.len; cde.d_reclen = GENERIC_DIRSIZ(&cde); /* * If there isn't enough space in the uio to return a * whole dirent, break off read */ if (uio->uio_resid < GENERIC_DIRSIZ(&cde)) break; /* Transfer */ error = uiomove(&cde, GENERIC_DIRSIZ(&cde), uio); if (error != 0) { error = EIO; goto out; } diroffset = dent.d_off; } } /* Pass on last transferred offset */ uio->uio_offset = diroffset; out: uma_zfree(p9fs_io_buffer_zone, io_buffer); return (error); } static void p9fs_doio(struct vnode *vp, struct buf *bp, struct p9_fid *vofid, struct ucred *cr) { struct uio *uiov; struct iovec io; int error; uint64_t off, offset; uint64_t filesize; uint64_t resid; uint32_t count; int64_t ret; struct p9fs_node *np; char *io_buffer; error = 0; np = P9FS_VTON(vp); filesize = np->inode.i_size; uiov = malloc(sizeof(struct uio), M_P9UIOV, M_WAITOK); uiov->uio_iov = &io; uiov->uio_iovcnt = 1; uiov->uio_segflg = UIO_SYSSPACE; io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK | M_ZERO); if (bp->b_iocmd == BIO_READ) { io.iov_len = uiov->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiov->uio_rw = UIO_READ; switch (vp->v_type) { case VREG: { uiov->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; if (uiov->uio_resid) { int left = uiov->uio_resid; int nread = bp->b_bcount - left; if (left > 0) bzero((char *)bp->b_data + nread, left); } /* where in the file are we to start reading */ offset = uiov->uio_offset; if (uiov->uio_offset >= filesize) goto out; while ((resid = uiov->uio_resid) > 0) { if (offset >= filesize) break; count = min(filesize - uiov->uio_offset, resid); if (count == 0) break; P9_DEBUG(VOPS, "%s: read called %#zx at %#jx\n", __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset); /* Copy count bytes into the uio */ ret = p9_client_read(vofid, offset, count, io_buffer); error = uiomove(io_buffer, ret, uiov); if (error != 0) goto out; offset += ret; } break; } default: printf("vfs: type %x unexpected\n", vp->v_type); break; } } else { if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiov->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; uiov->uio_offset = ((off_t)bp->b_blkno) * PAGE_SIZE + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiov->uio_rw = UIO_WRITE; if (uiov->uio_offset < 0) { error = EINVAL; goto out; } if (uiov->uio_resid == 0) goto out; resid = uiov->uio_resid; offset = uiov->uio_offset; error = 0; while ((resid = uiov->uio_resid) > 0) { off = 0; count = MIN(resid, P9FS_IOUNIT); error = uiomove(io_buffer, count, uiov); if (error != 0) { goto out; } while (count > 0) { /* Copy count bytes from the uio */ ret = p9_client_write(vofid, offset, count, io_buffer + off); if (ret < 0) goto out; P9_DEBUG(VOPS, "%s: write called %#zx at %#jx\n", __func__, uiov->uio_resid, (uintmax_t)uiov->uio_offset); off += ret; offset += ret; count -= ret; } } /* Update the fields in the node to reflect the change */ if (filesize < uiov->uio_offset + uiov->uio_resid) { np->inode.i_size = uiov->uio_offset + uiov->uio_resid; vnode_pager_setsize(vp, uiov->uio_offset + uiov->uio_resid); /* update the modified timers. */ p9fs_itimes(vp); } } else { bp->b_resid = 0; goto out1; } } out: /* Set the error */ if (error != 0) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; } bp->b_resid = uiov->uio_resid; out1: bufdone(bp); uma_zfree(p9fs_io_buffer_zone, io_buffer); free(uiov, M_P9UIOV); } /* * The I/O buffer is mapped to a uio and a client_write/client_read is performed * the same way as p9fs_read and p9fs_write. */ static int p9fs_strategy(struct vop_strategy_args *ap) { struct vnode *vp; struct buf *bp; struct ucred *cr; int error; struct open_fid_state ostate; vp = ap->a_vp; bp = ap->a_bp; error = 0; P9_DEBUG(VOPS, "%s: vp %p, iocmd %d\n ", __func__, vp, bp->b_iocmd); if (bp->b_iocmd == BIO_READ) cr = bp->b_rcred; else cr = bp->b_wcred; error = p9fs_get_open_fid(vp, bp->b_iocmd == BIO_READ ? FREAD : FWRITE, cr, &ostate); if (error) { P9_DEBUG(ERROR, "%s: p9fs_get_open_fid failed: %d\n", __func__, error); bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return (0); } p9fs_doio(vp, bp, ostate.vofid, cr); p9fs_release_open_fid(vp, cr, &ostate); return (0); } /* Rename a file */ static int p9fs_rename(struct vop_rename_args *ap) { struct vnode *tvp; struct vnode *tdvp; struct vnode *fvp; struct vnode *fdvp; struct componentname *tcnp; struct componentname *fcnp; struct p9fs_node *tdnode; struct p9fs_node *fdnode; struct p9fs_inode *fdinode; struct p9fs_node *fnode; struct p9fs_inode *finode; struct p9fs_session *vses; struct p9fs_node *tnode; struct p9fs_inode *tinode; struct p9_fid *olddirvfid, *newdirvfid ; int error; tvp = ap->a_tvp; tdvp = ap->a_tdvp; fvp = ap->a_fvp; fdvp = ap->a_fdvp; tcnp = ap->a_tcnp; fcnp = ap->a_fcnp; tdnode = P9FS_VTON(tdvp); fdnode = P9FS_VTON(fdvp); fdinode = &fdnode->inode; fnode = P9FS_VTON(fvp); finode = &fnode->inode; vses = fnode->p9fs_ses; error = 0; P9_DEBUG(VOPS, "%s: tvp %p, tdvp %p, fvp %p, fdvp %p\n ", __func__, tvp, tdvp, fvp, fdvp); /* Check for cross mount operation */ if (fvp->v_mount != tdvp->v_mount || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } /* warning if you are renaming to the same name */ if (fvp == tvp) error = 0; olddirvfid = p9fs_get_fid(vses->clnt, fdnode, fcnp->cn_cred, VFID, -1, &error); if (error != 0) goto out; newdirvfid = p9fs_get_fid(vses->clnt, tdnode, tcnp->cn_cred, VFID, -1, &error); if (error != 0) goto out; error = p9_client_renameat(olddirvfid, fcnp->cn_nameptr, newdirvfid, tcnp->cn_nameptr); if (error != 0) goto out; /* * decrement the link count on the "from" file whose name is going * to be changed if its a directory */ if (fvp->v_type == VDIR) { if (tvp && tvp->v_type == VDIR) cache_purge(tdvp); P9FS_DECR_LINKS(fdinode); cache_purge(fdvp); } /* Taking exclusive lock on the from node before decrementing the link count */ if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) goto out; P9FS_DECR_LINKS(finode); VOP_UNLOCK(fvp); if (tvp) { tnode = P9FS_VTON(tvp); tinode = &tnode->inode; P9FS_DECR_LINKS(tinode); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); return (error); } /* * Put VM pages, synchronously. * XXX: like smbfs, cannot use vop_stdputpages due to mapping requirement */ static int p9fs_putpages(struct vop_putpages_args *ap) { struct uio uio; struct iovec iov; int i, error, npages, count; off_t offset; int *rtvals; struct vnode *vp; struct thread *td; struct ucred *cred; struct p9fs_node *np; vm_page_t *pages; vm_offset_t kva; struct buf *bp; vp = ap->a_vp; np = P9FS_VTON(vp); td = curthread; cred = curthread->td_ucred; pages = ap->a_m; count = ap->a_count; rtvals = ap->a_rtvals; npages = btoc(count); offset = IDX_TO_OFF(pages[0]->pindex); /* * When putting pages, do not extend file past EOF. */ if (offset + count > np->inode.i_size) { count = np->inode.i_size - offset; if (count < 0) count = 0; } for (i = 0; i < npages; i++) rtvals[i] = VM_PAGER_ERROR; bp = uma_zalloc(p9fs_pbuf_zone, M_WAITOK); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, pages, npages); VM_CNT_INC(v_vnodeout); VM_CNT_ADD(v_vnodepgsout, count); iov.iov_base = (caddr_t) kva; iov.iov_len = count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = offset; uio.uio_resid = count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; uio.uio_td = td; P9_DEBUG(VOPS, "of=%jd,resid=%zd\n", (intmax_t)uio.uio_offset, uio.uio_resid); error = VOP_WRITE(vp, &uio, vnode_pager_putpages_ioflags(ap->a_sync), cred); pmap_qremove(kva, npages); uma_zfree(p9fs_pbuf_zone, bp); if (error == 0) vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid, np->inode.i_size - offset, npages * PAGE_SIZE); return (rtvals[0]); } struct vop_vector p9fs_vnops = { .vop_default = &default_vnodeops, .vop_lookup = p9fs_lookup, .vop_open = p9fs_open, .vop_close = p9fs_close, .vop_access = p9fs_access, .vop_getattr = p9fs_getattr_dotl, .vop_setattr = p9fs_setattr_dotl, .vop_reclaim = p9fs_reclaim, .vop_inactive = p9fs_inactive, .vop_readdir = p9fs_readdir, .vop_create = p9fs_create, .vop_mknod = p9fs_mknod, .vop_read = p9fs_read, .vop_write = p9fs_write, .vop_remove = p9fs_remove, .vop_mkdir = p9fs_mkdir, .vop_rmdir = p9fs_rmdir, .vop_strategy = p9fs_strategy, .vop_symlink = p9fs_symlink, .vop_rename = p9fs_rename, .vop_link = p9fs_link, .vop_readlink = p9fs_readlink, .vop_putpages = p9fs_putpages, }; VFS_VOP_VECTOR_REGISTER(p9fs_vnops);