Index: projects/pnfs-planb-server-stable11/sys/fs/nfs/nfs_commonkrpc.c =================================================================== --- projects/pnfs-planb-server-stable11/sys/fs/nfs/nfs_commonkrpc.c (revision 324529) +++ projects/pnfs-planb-server-stable11/sys/fs/nfs/nfs_commonkrpc.c (revision 324530) @@ -1,1323 +1,1343 @@ /*- * Copyright (c) 1989, 1991, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Socket operations for use by nfs */ #include "opt_kgssapi.h" #include "opt_nfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_nfsclient_nfs23_start_probe_func_t dtrace_nfscl_nfs234_start_probe; dtrace_nfsclient_nfs23_done_probe_func_t dtrace_nfscl_nfs234_done_probe; /* * Registered probes by RPC type. */ uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; #endif NFSSTATESPINLOCK; NFSREQSPINLOCK; NFSDLOCKMUTEX; NFSCLSTATEMUTEX; extern struct nfsstatsv1 nfsstatsv1; extern struct nfsreqhead nfsd_reqq; extern int nfscl_ticks; extern void (*ncl_call_invalcaches)(struct vnode *); extern int nfs_numnfscbd; extern int nfscl_debuglevel; SVCPOOL *nfscbd_pool; static int nfsrv_gsscallbackson = 0; static int nfs_bufpackets = 4; static int nfs_reconnects; static int nfs3_jukebox_delay = 10; static int nfs_skip_wcc_data_onerr = 1; SYSCTL_DECL(_vfs_nfs); SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, "Buffer reservation size 2 < x < 64"); SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, "Number of times the nfs client has had to reconnect"); SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, "Number of seconds to delay a retry after receiving EJUKEBOX"); SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0, "Disable weak cache consistency checking when server returns an error"); static void nfs_down(struct nfsmount *, struct thread *, const char *, int, int); static void nfs_up(struct nfsmount *, struct thread *, const char *, int, int); static int nfs_msg(struct thread *, const char *, const char *, int); struct nfs_cached_auth { int ca_refs; /* refcount, including 1 from the cache */ uid_t ca_uid; /* uid that corresponds to this auth */ AUTH *ca_auth; /* RPC auth handle */ }; static int nfsv2_procid[NFS_V3NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, NFSV2PROC_LOOKUP, NFSV2PROC_NOOP, NFSV2PROC_READLINK, NFSV2PROC_READ, NFSV2PROC_WRITE, NFSV2PROC_CREATE, NFSV2PROC_MKDIR, NFSV2PROC_SYMLINK, NFSV2PROC_CREATE, NFSV2PROC_REMOVE, NFSV2PROC_RMDIR, NFSV2PROC_RENAME, NFSV2PROC_LINK, NFSV2PROC_READDIR, NFSV2PROC_NOOP, NFSV2PROC_STATFS, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, }; /* * Initialize sockets and congestion for a new NFS connection. * We do not free the sockaddr if error. */ int newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, struct ucred *cred, NFSPROC_T *p, int callback_retry_mult) { int rcvreserve, sndreserve; int pktscale, pktscalesav; struct sockaddr *saddr; struct ucred *origcred; CLIENT *client; struct netconfig *nconf; struct socket *so; int one = 1, retries, error = 0; struct thread *td = curthread; SVCXPRT *xprt; struct timeval timo; /* * We need to establish the socket using the credentials of * the mountpoint. Some parts of this process (such as * sobind() and soconnect()) will use the curent thread's * credential instead of the socket credential. To work * around this, temporarily change the current thread's * credential to that of the mountpoint. * * XXX: It would be better to explicitly pass the correct * credential to sobind() and soconnect(). */ origcred = td->td_ucred; /* * Use the credential in nr_cred, if not NULL. */ if (nrp->nr_cred != NULL) td->td_ucred = nrp->nr_cred; else td->td_ucred = cred; saddr = nrp->nr_nam; if (saddr->sa_family == AF_INET) if (nrp->nr_sotype == SOCK_DGRAM) nconf = getnetconfigent("udp"); else nconf = getnetconfigent("tcp"); else if (nrp->nr_sotype == SOCK_DGRAM) nconf = getnetconfigent("udp6"); else nconf = getnetconfigent("tcp6"); pktscale = nfs_bufpackets; if (pktscale < 2) pktscale = 2; if (pktscale > 64) pktscale = 64; pktscalesav = pktscale; /* * soreserve() can fail if sb_max is too small, so shrink pktscale * and try again if there is an error. * Print a log message suggesting increasing sb_max. * Creating a socket and doing this is necessary since, if the * reservation sizes are too large and will make soreserve() fail, * the connection will work until a large send is attempted and * then it will loop in the krpc code. */ so = NULL; saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *); error = socreate(saddr->sa_family, &so, nrp->nr_sotype, nrp->nr_soproto, td->td_ucred, td); if (error) { td->td_ucred = origcred; goto out; } do { if (error != 0 && pktscale > 2) { if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM && pktscale == pktscalesav) printf("Consider increasing kern.ipc.maxsockbuf\n"); pktscale--; } if (nrp->nr_sotype == SOCK_DGRAM) { if (nmp != NULL) { sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * pktscale; rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } else { if (nrp->nr_sotype != SOCK_STREAM) panic("nfscon sotype"); if (nmp != NULL) { sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } error = soreserve(so, sndreserve, rcvreserve); if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM && pktscale <= 2) printf("Must increase kern.ipc.maxsockbuf or reduce" " rsize, wsize\n"); } while (error != 0 && pktscale > 2); soclose(so); if (error) { td->td_ucred = origcred; goto out; } client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog, nrp->nr_vers, sndreserve, rcvreserve); CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq"); if (nmp != NULL) { if ((nmp->nm_flag & NFSMNT_INT)) CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); if ((nmp->nm_flag & NFSMNT_RESVPORT)) CLNT_CONTROL(client, CLSET_PRIVPORT, &one); if (NFSHASSOFT(nmp)) { if (nmp->nm_sotype == SOCK_DGRAM) /* * For UDP, the large timeout for a reconnect * will be set to "nm_retry * nm_timeo / 2", so * we only want to do 2 reconnect timeout * retries. */ retries = 2; else retries = nmp->nm_retry; } else retries = INT_MAX; /* cred == NULL for DS connects. */ if (NFSHASNFSV4N(nmp) && cred != NULL) { /* * Make sure the nfscbd_pool doesn't get destroyed * while doing this. */ NFSD_LOCK(); if (nfs_numnfscbd > 0) { nfs_numnfscbd++; NFSD_UNLOCK(); xprt = svc_vc_create_backchannel(nfscbd_pool); CLNT_CONTROL(client, CLSET_BACKCHANNEL, xprt); NFSD_LOCK(); nfs_numnfscbd--; if (nfs_numnfscbd == 0) wakeup(&nfs_numnfscbd); } NFSD_UNLOCK(); } } else { /* * Three cases: * - Null RPC callback to client * - Non-Null RPC callback to client, wait a little longer * - upcalls to nfsuserd and gssd (clp == NULL) */ if (callback_retry_mult == 0) { retries = NFSV4_UPCALLRETRY; CLNT_CONTROL(client, CLSET_PRIVPORT, &one); } else { retries = NFSV4_CALLBACKRETRY * callback_retry_mult; } } CLNT_CONTROL(client, CLSET_RETRIES, &retries); if (nmp != NULL) { /* * For UDP, there are 2 timeouts: * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer * that does a retransmit of an RPC request using the same * socket and xid. This is what you normally want to do, * since NFS servers depend on "same xid" for their * Duplicate Request Cache. * - timeout specified in CLNT_CALL_MBUF(), which specifies when * retransmits on the same socket should fail and a fresh * socket created. Each of these timeouts counts as one * CLSET_RETRIES as set above. * Set the initial retransmit timeout for UDP. This timeout * doesn't exist for TCP and the following call just fails, * which is ok. */ timo.tv_sec = nmp->nm_timeo / NFS_HZ; timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); } mtx_lock(&nrp->nr_mtx); if (nrp->nr_client != NULL) { mtx_unlock(&nrp->nr_mtx); /* * Someone else already connected. */ CLNT_RELEASE(client); } else { nrp->nr_client = client; /* * Protocols that do not require connections may be optionally * left unconnected for servers that reply from a port other * than NFS_PORT. */ if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) { mtx_unlock(&nrp->nr_mtx); CLNT_CONTROL(client, CLSET_CONNECT, &one); } else mtx_unlock(&nrp->nr_mtx); } /* Restore current thread's credentials. */ td->td_ucred = origcred; out: NFSEXITCODE(error); return (error); } /* * NFS disconnect. Clean up and unlink. */ void newnfs_disconnect(struct nfssockreq *nrp) { CLIENT *client; mtx_lock(&nrp->nr_mtx); if (nrp->nr_client != NULL) { client = nrp->nr_client; nrp->nr_client = NULL; mtx_unlock(&nrp->nr_mtx); rpc_gss_secpurge_call(client); CLNT_CLOSE(client); CLNT_RELEASE(client); } else { mtx_unlock(&nrp->nr_mtx); } } static AUTH * nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, char *srv_principal, gss_OID mech_oid, struct ucred *cred) { rpc_gss_service_t svc; AUTH *auth; switch (secflavour) { case RPCSEC_GSS_KRB5: case RPCSEC_GSS_KRB5I: case RPCSEC_GSS_KRB5P: if (!mech_oid) { if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid)) return (NULL); } if (secflavour == RPCSEC_GSS_KRB5) svc = rpc_gss_svc_none; else if (secflavour == RPCSEC_GSS_KRB5I) svc = rpc_gss_svc_integrity; else svc = rpc_gss_svc_privacy; if (clnt_principal == NULL) auth = rpc_gss_secfind_call(nrp->nr_client, cred, srv_principal, mech_oid, svc); else { auth = rpc_gss_seccreate_call(nrp->nr_client, cred, clnt_principal, srv_principal, "kerberosv5", svc, NULL, NULL, NULL); return (auth); } if (auth != NULL) return (auth); /* fallthrough */ case AUTH_SYS: default: return (authunix_create(cred)); } } /* * Callback from the RPC code to generate up/down notifications. */ struct nfs_feedback_arg { struct nfsmount *nf_mount; int nf_lastmsg; /* last tprintf */ int nf_tprintfmsg; struct thread *nf_td; }; static void nfs_feedback(int type, int proc, void *arg) { struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; struct nfsmount *nmp = nf->nf_mount; time_t now; switch (type) { case FEEDBACK_REXMIT2: case FEEDBACK_RECONNECT: now = NFSD_MONOSEC; if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) { nfs_down(nmp, nf->nf_td, "not responding", 0, NFSSTA_TIMEO); nf->nf_tprintfmsg = TRUE; nf->nf_lastmsg = now; } break; case FEEDBACK_OK: nfs_up(nf->nf_mount, nf->nf_td, "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); break; } } /* * newnfs_request - goes something like this * - does the rpc by calling the krpc layer * - break down rpc header and return with nfs reply * nb: always frees up nd_mreq mbuf list */ int newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp, struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers, u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep) { uint32_t retseq, retval, slotseq, *tl; time_t waituntil; int i = 0, j = 0, opcnt, set_sigset = 0, slot; int trycnt, error = 0, usegssname = 0, secflavour = AUTH_SYS; int freeslot, maxslot, reterr, slotpos, timeo; u_int16_t procnum; u_int trylater_delay = 1; struct nfs_feedback_arg nf; struct timeval timo; AUTH *auth; struct rpc_callextra ext; enum clnt_stat stat; struct nfsreq *rep = NULL; char *srv_principal = NULL, *clnt_principal = NULL; sigset_t oldset; struct ucred *authcred; struct nfsclsession *sep; uint8_t sessionid[NFSX_V4SESSIONID]; sep = dssep; if (xidp != NULL) *xidp = 0; /* Reject requests while attempting a forced unmount. */ if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) { m_freem(nd->nd_mreq); return (ESTALE); } /* * Set authcred, which is used to acquire RPC credentials to * the cred argument, by default. The crhold() should not be * necessary, but will ensure that some future code change * doesn't result in the credential being free'd prematurely. */ authcred = crhold(cred); /* For client side interruptible mounts, mask off the signals. */ if (nmp != NULL && td != NULL && NFSHASINT(nmp)) { newnfs_set_sigmask(td, &oldset); set_sigset = 1; } /* * XXX if not already connected call nfs_connect now. Longer * term, change nfs_mount to call nfs_connect unconditionally * and let clnt_reconnect_create handle reconnects. */ if (nrp->nr_client == NULL) newnfs_connect(nmp, nrp, cred, td, 0); /* * For a client side mount, nmp is != NULL and clp == NULL. For * server calls (callbacks or upcalls), nmp == NULL. */ if (clp != NULL) { NFSLOCKSTATE(); if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) { secflavour = RPCSEC_GSS_KRB5; if (nd->nd_procnum != NFSPROC_NULL) { if (clp->lc_flags & LCL_GSSINTEGRITY) secflavour = RPCSEC_GSS_KRB5I; else if (clp->lc_flags & LCL_GSSPRIVACY) secflavour = RPCSEC_GSS_KRB5P; } } NFSUNLOCKSTATE(); } else if (nmp != NULL && NFSHASKERB(nmp) && nd->nd_procnum != NFSPROC_NULL) { if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0) nd->nd_flag |= ND_USEGSSNAME; if ((nd->nd_flag & ND_USEGSSNAME) != 0) { /* * If there is a client side host based credential, * use that, otherwise use the system uid, if set. * The system uid is in the nmp->nm_sockreq.nr_cred * credentials. */ if (nmp->nm_krbnamelen > 0) { usegssname = 1; clnt_principal = nmp->nm_krbname; } else if (nmp->nm_uid != (uid_t)-1) { KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } } else if (nmp->nm_krbnamelen == 0 && nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) { /* * If there is no host based principal name and * the system uid is set and this is root, use the * system uid, since root won't have user * credentials in a credentials cache file. * The system uid is in the nmp->nm_sockreq.nr_cred * credentials. */ KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } if (NFSHASINTEGRITY(nmp)) secflavour = RPCSEC_GSS_KRB5I; else if (NFSHASPRIVACY(nmp)) secflavour = RPCSEC_GSS_KRB5P; else secflavour = RPCSEC_GSS_KRB5; srv_principal = NFSMNT_SRVKRBNAME(nmp); } else if (nmp != NULL && !NFSHASKERB(nmp) && nd->nd_procnum != NFSPROC_NULL && (nd->nd_flag & ND_USEGSSNAME) != 0) { /* * Use the uid that did the mount when the RPC is doing * NFSv4 system operations, as indicated by the * ND_USEGSSNAME flag, for the AUTH_SYS case. * The credentials in nm_sockreq.nr_cred were used for the * mount. */ KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } if (nmp != NULL) { bzero(&nf, sizeof(struct nfs_feedback_arg)); nf.nf_mount = nmp; nf.nf_td = td; nf.nf_lastmsg = NFSD_MONOSEC - ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay)); } if (nd->nd_procnum == NFSPROC_NULL) auth = authnone_create(); else if (usegssname) { /* * For this case, the authenticator is held in the * nfssockreq structure, so don't release the reference count * held on it. --> Don't AUTH_DESTROY() it in this function. */ if (nrp->nr_auth == NULL) nrp->nr_auth = nfs_getauth(nrp, secflavour, clnt_principal, srv_principal, NULL, authcred); else rpc_gss_refresh_auth_call(nrp->nr_auth); auth = nrp->nr_auth; } else auth = nfs_getauth(nrp, secflavour, NULL, srv_principal, NULL, authcred); crfree(authcred); if (auth == NULL) { m_freem(nd->nd_mreq); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (EACCES); } bzero(&ext, sizeof(ext)); ext.rc_auth = auth; if (nmp != NULL) { ext.rc_feedback = nfs_feedback; ext.rc_feedback_arg = &nf; } procnum = nd->nd_procnum; if ((nd->nd_flag & ND_NFSV4) && nd->nd_procnum != NFSPROC_NULL && nd->nd_procnum != NFSV4PROC_CBCOMPOUND) procnum = NFSV4PROC_COMPOUND; if (nmp != NULL) { NFSINCRGLOBAL(nfsstatsv1.rpcrequests); /* Map the procnum to the old NFSv2 one, as required. */ if ((nd->nd_flag & ND_NFSV2) != 0) { if (nd->nd_procnum < NFS_V3NPROCS) procnum = nfsv2_procid[nd->nd_procnum]; else procnum = NFSV2PROC_NOOP; } /* * Now only used for the R_DONTRECOVER case, but until that is * supported within the krpc code, I need to keep a queue of * outstanding RPCs for nfsv4 client requests. */ if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND) MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSDREQ, M_WAITOK); #ifdef KDTRACE_HOOKS if (dtrace_nfscl_nfs234_start_probe != NULL) { uint32_t probe_id; int probe_procnum; if (nd->nd_flag & ND_NFSV4) { probe_id = nfscl_nfs4_start_probes[nd->nd_procnum]; probe_procnum = nd->nd_procnum; } else if (nd->nd_flag & ND_NFSV3) { probe_id = nfscl_nfs3_start_probes[procnum]; probe_procnum = procnum; } else { probe_id = nfscl_nfs2_start_probes[nd->nd_procnum]; probe_procnum = procnum; } if (probe_id != 0) (dtrace_nfscl_nfs234_start_probe) (probe_id, vp, nd->nd_mreq, cred, probe_procnum); } #endif } trycnt = 0; freeslot = -1; /* Set to slot that needs to be free'd */ tryagain: slot = -1; /* Slot that needs a sequence# increment. */ /* * This timeout specifies when a new socket should be created, * along with new xid values. For UDP, this should be done * infrequently, since retransmits of RPC requests should normally * use the same xid. */ if (nmp == NULL) { timo.tv_usec = 0; if (clp == NULL) timo.tv_sec = NFSV4_UPCALLTIMEO; else timo.tv_sec = NFSV4_CALLBACKTIMEO; } else { if (nrp->nr_sotype != SOCK_DGRAM) { timo.tv_usec = 0; if ((nmp->nm_flag & NFSMNT_NFSV4)) timo.tv_sec = INT_MAX; else timo.tv_sec = NFS_TCPTIMEO; } else { if (NFSHASSOFT(nmp)) { /* * CLSET_RETRIES is set to 2, so this should be * half of the total timeout required. */ timeo = nmp->nm_retry * nmp->nm_timeo / 2; if (timeo < 1) timeo = 1; timo.tv_sec = timeo / NFS_HZ; timo.tv_usec = (timeo % NFS_HZ) * 1000000 / NFS_HZ; } else { /* For UDP hard mounts, use a large value. */ timo.tv_sec = NFS_MAXTIMEO / NFS_HZ; timo.tv_usec = 0; } } if (rep != NULL) { rep->r_flags = 0; rep->r_nmp = nmp; /* * Chain request into list of outstanding requests. */ NFSLOCKREQ(); TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain); NFSUNLOCKREQ(); } } nd->nd_mrep = NULL; if (clp != NULL && sep != NULL) stat = clnt_bck_call(nrp->nr_client, &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt); else stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo); if (rep != NULL) { /* * RPC done, unlink the request. */ NFSLOCKREQ(); TAILQ_REMOVE(&nfsd_reqq, rep, r_chain); NFSUNLOCKREQ(); } /* * If there was a successful reply and a tprintf msg. * tprintf a response. */ if (stat == RPC_SUCCESS) { error = 0; } else if (stat == RPC_TIMEDOUT) { NFSINCRGLOBAL(nfsstatsv1.rpctimeouts); error = ETIMEDOUT; } else if (stat == RPC_VERSMISMATCH) { NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EOPNOTSUPP; } else if (stat == RPC_PROGVERSMISMATCH) { NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EPROTONOSUPPORT; } else if (stat == RPC_INTR) { error = EINTR; } else { NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EACCES; } if (error) { m_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); if (rep != NULL) FREE((caddr_t)rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (error); } KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n")); /* * Search for any mbufs that are not a multiple of 4 bytes long * or with m_data not longword aligned. * These could cause pointer alignment problems, so copy them to * well aligned mbufs. */ newnfs_realign(&nd->nd_mrep, M_WAITOK); nd->nd_md = nd->nd_mrep; nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); nd->nd_repstat = 0; if (nd->nd_procnum != NFSPROC_NULL && nd->nd_procnum != NFSV4PROC_CBNULL) { /* If sep == NULL, set it to the default in nmp. */ if (sep == NULL && nmp != NULL) sep = nfsmnt_mdssession(nmp); /* * and now the actual NFS xdr. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl); if (nd->nd_repstat >= 10000) NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum, (int)nd->nd_repstat); /* * Get rid of the tag, return count and SEQUENCE result for * NFSv4. */ if ((nd->nd_flag & ND_NFSV4) != 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); opcnt = fxdr_unsigned(int, *tl++); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); if (j >= 10000) NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j); /* * If the first op is Sequence, free up the slot. */ if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) || (clp != NULL && i == NFSV4OP_CBSEQUENCE && j != 0)) NFSCL_DEBUG(1, "failed seq=%d\n", j); if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) || (clp != NULL && i == NFSV4OP_CBSEQUENCE && j == 0) ) { if (i == NFSV4OP_SEQUENCE) NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED); else NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); mtx_lock(&sep->nfsess_mtx); if (bcmp(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID) == 0) { tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; retseq = fxdr_unsigned(uint32_t, *tl++); slot = fxdr_unsigned(int, *tl++); freeslot = slot; if (retseq != sep->nfsess_slotseq[slot]) printf("retseq diff 0x%x\n", retseq); retval = fxdr_unsigned(uint32_t, *++tl); if ((retval + 1) < sep->nfsess_foreslots ) sep->nfsess_foreslots = (retval + 1); else if ((retval + 1) > sep->nfsess_foreslots) sep->nfsess_foreslots = (retval < 64) ? (retval + 1) : 64; } mtx_unlock(&sep->nfsess_mtx); /* Grab the op and status for the next one. */ if (opcnt > 1) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); } } } if (nd->nd_repstat != 0) { if (nd->nd_repstat == NFSERR_BADSESSION && nmp != NULL && dssep == NULL) { /* * If this is a client side MDS RPC, mark * the MDS session defunct and initiate * recovery, as required. * The nfsess_defunct field is protected by * the NFSLOCKMNT()/nm_mtx lock and not the * nfsess_mtx lock to simplify its handling, * for the MDS session. This lock is also * sufficient for nfsess_sessionid, since it * never changes in the structure. */ NFSCL_DEBUG(1, "Got badsession\n"); NFSLOCKCLSTATE(); NFSLOCKMNT(nmp); sep = NFSMNT_MDSSESSION(nmp); if (bcmp(sep->nfsess_sessionid, nd->nd_sequence, NFSX_V4SESSIONID) == 0) { /* Initiate recovery. */ sep->nfsess_defunct = 1; NFSCL_DEBUG(1, "Marked defunct\n"); if (nmp->nm_clp != NULL) { nmp->nm_clp->nfsc_flags |= NFSCLFLAGS_RECOVER; wakeup(nmp->nm_clp); } } NFSUNLOCKCLSTATE(); /* * Sleep for up to 1sec waiting for a new * session. */ mtx_sleep(&nmp->nm_sess, &nmp->nm_mtx, PZERO, "nfsbadsess", hz); /* * Get the session again, in case a new one * has been created during the sleep. */ sep = NFSMNT_MDSSESSION(nmp); NFSUNLOCKMNT(nmp); if ((nd->nd_flag & ND_LOOPBADSESS) != 0) { reterr = nfsv4_sequencelookup(nmp, sep, &slotpos, &maxslot, &slotseq, sessionid); if (reterr == 0) { /* Fill in new session info. */ NFSCL_DEBUG(1, "Filling in new sequence\n"); tl = nd->nd_sequence; bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl = txdr_unsigned(maxslot); } if (reterr == NFSERR_BADSESSION || reterr == 0) { NFSCL_DEBUG(1, "Badsession looping\n"); m_freem(nd->nd_mrep); nd->nd_mrep = NULL; goto tryagain; } nd->nd_repstat = reterr; NFSCL_DEBUG(1, "Got err=%d\n", reterr); } } if (((nd->nd_repstat == NFSERR_DELAY || nd->nd_repstat == NFSERR_GRACE) && (nd->nd_flag & ND_NFSV4) && nd->nd_procnum != NFSPROC_DELEGRETURN && nd->nd_procnum != NFSPROC_SETATTR && nd->nd_procnum != NFSPROC_READ && nd->nd_procnum != NFSPROC_READDS && nd->nd_procnum != NFSPROC_WRITE && nd->nd_procnum != NFSPROC_WRITEDS && nd->nd_procnum != NFSPROC_OPEN && nd->nd_procnum != NFSPROC_CREATE && nd->nd_procnum != NFSPROC_OPENCONFIRM && nd->nd_procnum != NFSPROC_OPENDOWNGRADE && nd->nd_procnum != NFSPROC_CLOSE && nd->nd_procnum != NFSPROC_LOCK && nd->nd_procnum != NFSPROC_LOCKU) || (nd->nd_repstat == NFSERR_DELAY && (nd->nd_flag & ND_NFSV4) == 0) || nd->nd_repstat == NFSERR_RESOURCE) { if (trylater_delay > NFS_TRYLATERDEL) trylater_delay = NFS_TRYLATERDEL; waituntil = NFSD_MONOSEC + trylater_delay; while (NFSD_MONOSEC < waituntil) (void) nfs_catnap(PZERO, 0, "nfstry"); trylater_delay *= 2; if (slot != -1) { mtx_lock(&sep->nfsess_mtx); sep->nfsess_slotseq[slot]++; *nd->nd_slotseq = txdr_unsigned( sep->nfsess_slotseq[slot]); mtx_unlock(&sep->nfsess_mtx); } m_freem(nd->nd_mrep); nd->nd_mrep = NULL; goto tryagain; } /* * If the File Handle was stale, invalidate the * lookup cache, just in case. * (vp != NULL implies a client side call) */ if (nd->nd_repstat == ESTALE && vp != NULL) { cache_purge(vp); if (ncl_call_invalcaches != NULL) (*ncl_call_invalcaches)(vp); } } if ((nd->nd_flag & ND_NFSV4) != 0) { /* Free the slot, as required. */ if (freeslot != -1) nfsv4_freeslot(sep, freeslot); /* * If this op is Putfh, throw its results away. */ if (j >= 10000) NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j); if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) { NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); if (j >= 10000) NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i, j); /* * All Compounds that do an Op that must * be in sequence consist of NFSV4OP_PUTFH * followed by one of these. As such, we * can determine if the seqid# should be * incremented, here. */ if ((i == NFSV4OP_OPEN || i == NFSV4OP_OPENCONFIRM || i == NFSV4OP_OPENDOWNGRADE || i == NFSV4OP_CLOSE || i == NFSV4OP_LOCK || i == NFSV4OP_LOCKU) && (j == 0 || (j != NFSERR_STALECLIENTID && j != NFSERR_STALESTATEID && j != NFSERR_BADSTATEID && j != NFSERR_BADSEQID && j != NFSERR_BADXDR && j != NFSERR_RESOURCE && j != NFSERR_NOFILEHANDLE))) nd->nd_flag |= ND_INCRSEQID; } /* * If this op's status is non-zero, mark * that there is no more data to process. * The exception is Setattr, which always has xdr * when it has failed. */ if (j != 0 && i != NFSV4OP_SETATTR) nd->nd_flag |= ND_NOMOREDATA; /* * If R_DONTRECOVER is set, replace the stale error * reply, so that recovery isn't initiated. */ if ((nd->nd_repstat == NFSERR_STALECLIENTID || nd->nd_repstat == NFSERR_BADSESSION || nd->nd_repstat == NFSERR_STALESTATEID) && rep != NULL && (rep->r_flags & R_DONTRECOVER)) nd->nd_repstat = NFSERR_STALEDONTRECOVER; } } #ifdef KDTRACE_HOOKS if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) { uint32_t probe_id; int probe_procnum; if (nd->nd_flag & ND_NFSV4) { probe_id = nfscl_nfs4_done_probes[nd->nd_procnum]; probe_procnum = nd->nd_procnum; } else if (nd->nd_flag & ND_NFSV3) { probe_id = nfscl_nfs3_done_probes[procnum]; probe_procnum = procnum; } else { probe_id = nfscl_nfs2_done_probes[nd->nd_procnum]; probe_procnum = procnum; } if (probe_id != 0) (dtrace_nfscl_nfs234_done_probe)(probe_id, vp, nd->nd_mreq, cred, probe_procnum, 0); } #endif m_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); if (rep != NULL) FREE((caddr_t)rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (0); nfsmout: mbuf_freem(nd->nd_mrep); mbuf_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); if (rep != NULL) FREE((caddr_t)rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (error); } /* * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and * wait for all requests to complete. This is used by forced unmounts * to terminate any outstanding RPCs. */ int newnfs_nmcancelreqs(struct nfsmount *nmp) { + struct nfsclds *dsp; + struct __rpc_client *cl; if (nmp->nm_sockreq.nr_client != NULL) CLNT_CLOSE(nmp->nm_sockreq.nr_client); +lookformore: + NFSLOCKMNT(nmp); + TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) { + NFSLOCKDS(dsp); + if (dsp != TAILQ_FIRST(&nmp->nm_sess) && + (dsp->nfsclds_flags & NFSCLDS_CLOSED) == 0 && + dsp->nfsclds_sockp != NULL && + dsp->nfsclds_sockp->nr_client != NULL) { + dsp->nfsclds_flags |= NFSCLDS_CLOSED; + cl = dsp->nfsclds_sockp->nr_client; + NFSUNLOCKDS(dsp); + NFSUNLOCKMNT(nmp); + CLNT_CLOSE(cl); + goto lookformore; + } + NFSUNLOCKDS(dsp); + } + NFSUNLOCKMNT(nmp); return (0); } /* * Any signal that can interrupt an NFS operation in an intr mount * should be added to this set. SIGSTOP and SIGKILL cannot be masked. */ int newnfs_sig_set[] = { SIGINT, SIGTERM, SIGHUP, SIGKILL, SIGQUIT }; /* * Check to see if one of the signals in our subset is pending on * the process (in an intr mount). */ static int nfs_sig_pending(sigset_t set) { int i; for (i = 0 ; i < nitems(newnfs_sig_set); i++) if (SIGISMEMBER(set, newnfs_sig_set[i])) return (1); return (0); } /* * The set/restore sigmask functions are used to (temporarily) overwrite * the thread td_sigmask during an RPC call (for example). These are also * used in other places in the NFS client that might tsleep(). */ void newnfs_set_sigmask(struct thread *td, sigset_t *oldset) { sigset_t newset; int i; struct proc *p; SIGFILLSET(newset); if (td == NULL) td = curthread; /* XXX */ p = td->td_proc; /* Remove the NFS set of signals from newset */ PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); for (i = 0 ; i < nitems(newnfs_sig_set); i++) { /* * But make sure we leave the ones already masked * by the process, ie. remove the signal from the * temporary signalmask only if it wasn't already * in p_sigmask. */ if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) && !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i])) SIGDELSET(newset, newnfs_sig_set[i]); } mtx_unlock(&p->p_sigacts->ps_mtx); kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, SIGPROCMASK_PROC_LOCKED); PROC_UNLOCK(p); } void newnfs_restore_sigmask(struct thread *td, sigset_t *set) { if (td == NULL) td = curthread; /* XXX */ kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); } /* * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the * old one after msleep() returns. */ int newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) { sigset_t oldset; int error; struct proc *p; if ((priority & PCATCH) == 0) return msleep(ident, mtx, priority, wmesg, timo); if (td == NULL) td = curthread; /* XXX */ newnfs_set_sigmask(td, &oldset); error = msleep(ident, mtx, priority, wmesg, timo); newnfs_restore_sigmask(td, &oldset); p = td->td_proc; return (error); } /* * Test for a termination condition pending on the process. * This is used for NFSMNT_INT mounts. */ int newnfs_sigintr(struct nfsmount *nmp, struct thread *td) { struct proc *p; sigset_t tmpset; /* Terminate all requests while attempting a forced unmount. */ if (NFSCL_FORCEDISM(nmp->nm_mountp)) return (EIO); if (!(nmp->nm_flag & NFSMNT_INT)) return (0); if (td == NULL) return (0); p = td->td_proc; PROC_LOCK(p); tmpset = p->p_siglist; SIGSETOR(tmpset, td->td_siglist); SIGSETNAND(tmpset, td->td_sigmask); mtx_lock(&p->p_sigacts->ps_mtx); SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); mtx_unlock(&p->p_sigacts->ps_mtx); if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) && nfs_sig_pending(tmpset)) { PROC_UNLOCK(p); return (EINTR); } PROC_UNLOCK(p); return (0); } static int nfs_msg(struct thread *td, const char *server, const char *msg, int error) { struct proc *p; p = td ? td->td_proc : NULL; if (error) { tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, msg, error); } else { tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); } return (0); } static void nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, int error, int flags) { if (nmp == NULL) return; mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state |= NFSSTA_TIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESP, 0); } else mtx_unlock(&nmp->nm_mtx); mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state |= NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 0); } else mtx_unlock(&nmp->nm_mtx); nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); } static void nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, int flags, int tprintfmsg) { if (nmp == NULL) return; if (tprintfmsg) { nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); } mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state &= ~NFSSTA_TIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESP, 1); } else mtx_unlock(&nmp->nm_mtx); mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state &= ~NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 1); } else mtx_unlock(&nmp->nm_mtx); } Index: projects/pnfs-planb-server-stable11/sys/fs/nfs/nfs_commonport.c =================================================================== --- projects/pnfs-planb-server-stable11/sys/fs/nfs/nfs_commonport.c (revision 324529) +++ projects/pnfs-planb-server-stable11/sys/fs/nfs/nfs_commonport.c (revision 324530) @@ -1,764 +1,815 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Functions that need to be different for different versions of BSD * kernel should be kept here, along with any global storage specific * to this BSD variant. */ #include +#include #include +#include #include #include #include #include #include #include #include #include extern int nfscl_ticks; extern int nfsrv_nfsuserd; extern struct nfssockreq nfsrv_nfsuserdsock; extern void (*nfsd_call_recall)(struct vnode *, int, struct ucred *, struct thread *); extern int nfsrv_useacl; struct mount nfsv4root_mnt; int newnfs_numnfsd = 0; struct nfsstatsv1 nfsstatsv1; int nfs_numnfscbd = 0; int nfscl_debuglevel = 0; char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; struct callout newnfsd_callout; int nfsrv_lughashsize = 100; struct mtx nfsrv_dslock_mtx; struct mtx nfsrv_dsclock_mtx; struct mtx nfsrv_dsrmlock_mtx; struct mtx nfsrv_dwrpclock_mtx; struct mtx nfsrv_dsrpclock_mtx; struct mtx nfsrv_darpclock_mtx; struct nfsdevicehead nfsrv_devidhead; void (*nfsd_call_servertimer)(void) = NULL; void (*ncl_call_invalcaches)(struct vnode *) = NULL; +int nfs_pnfsio(task_fn_t *, void *); + static int nfs_realign_test; static int nfs_realign_count; static struct ext_nfsstats oldnfsstats; SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, "Number of realign tests done"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, "Number of mbuf realignments done"); SYSCTL_STRING(_vfs_nfs, OID_AUTO, callback_addr, CTLFLAG_RW, nfsv4_callbackaddr, sizeof(nfsv4_callbackaddr), "NFSv4 callback addr for server to use"); SYSCTL_INT(_vfs_nfs, OID_AUTO, debuglevel, CTLFLAG_RW, &nfscl_debuglevel, 0, "Debug level for NFS client"); SYSCTL_INT(_vfs_nfs, OID_AUTO, userhashsize, CTLFLAG_RDTUN, &nfsrv_lughashsize, 0, "Size of hash tables for uid/name mapping"); +int nfs_pnfsiothreads = 0; +SYSCTL_INT(_vfs_nfs, OID_AUTO, pnfsiothreads, CTLFLAG_RW, &nfs_pnfsiothreads, + 0, "Number of pNFS mirror I/O threads"); /* * Defines for malloc * (Here for FreeBSD, since they allocate storage.) */ MALLOC_DEFINE(M_NEWNFSRVCACHE, "NFSD srvcache", "NFSD Server Request Cache"); MALLOC_DEFINE(M_NEWNFSDCLIENT, "NFSD V4client", "NFSD V4 Client Id"); MALLOC_DEFINE(M_NEWNFSDSTATE, "NFSD V4state", "NFSD V4 State (Openowner, Open, Lockowner, Delegation"); MALLOC_DEFINE(M_NEWNFSDLOCK, "NFSD V4lock", "NFSD V4 byte range lock"); MALLOC_DEFINE(M_NEWNFSDLOCKFILE, "NFSD lckfile", "NFSD Open/Lock file"); MALLOC_DEFINE(M_NEWNFSSTRING, "NFSD string", "NFSD V4 long string"); MALLOC_DEFINE(M_NEWNFSUSERGROUP, "NFSD usrgroup", "NFSD V4 User/group map"); MALLOC_DEFINE(M_NEWNFSDREQ, "NFS req", "NFS request header"); MALLOC_DEFINE(M_NEWNFSFH, "NFS fh", "NFS file handle"); MALLOC_DEFINE(M_NEWNFSCLOWNER, "NFSCL owner", "NFSCL Open Owner"); MALLOC_DEFINE(M_NEWNFSCLOPEN, "NFSCL open", "NFSCL Open"); MALLOC_DEFINE(M_NEWNFSCLDELEG, "NFSCL deleg", "NFSCL Delegation"); MALLOC_DEFINE(M_NEWNFSCLCLIENT, "NFSCL client", "NFSCL Client"); MALLOC_DEFINE(M_NEWNFSCLLOCKOWNER, "NFSCL lckown", "NFSCL Lock Owner"); MALLOC_DEFINE(M_NEWNFSCLLOCK, "NFSCL lck", "NFSCL Lock"); MALLOC_DEFINE(M_NEWNFSV4NODE, "NEWNFSnode", "NFS vnode"); MALLOC_DEFINE(M_NEWNFSDIRECTIO, "NEWdirectio", "NFS Direct IO buffer"); MALLOC_DEFINE(M_NEWNFSDIROFF, "NFSCL diroffdiroff", "NFS directory offset data"); MALLOC_DEFINE(M_NEWNFSDROLLBACK, "NFSD rollback", "NFS local lock rollback"); MALLOC_DEFINE(M_NEWNFSLAYOUT, "NFSCL layout", "NFSv4.1 Layout"); MALLOC_DEFINE(M_NEWNFSFLAYOUT, "NFSCL flayout", "NFSv4.1 File Layout"); MALLOC_DEFINE(M_NEWNFSDEVINFO, "NFSCL devinfo", "NFSv4.1 Device Info"); MALLOC_DEFINE(M_NEWNFSSOCKREQ, "NFSCL sockreq", "NFS Sock Req"); MALLOC_DEFINE(M_NEWNFSCLDS, "NFSCL session", "NFSv4.1 Session"); MALLOC_DEFINE(M_NEWNFSLAYRECALL, "NFSCL layrecall", "NFSv4.1 Layout Recall"); MALLOC_DEFINE(M_NEWNFSDSESSION, "NFSD session", "NFSD Session for a client"); /* * Definition of mutex locks. * newnfsd_mtx is used in nfsrvd_nfsd() to protect the nfs socket list * and assorted other nfsd structures. */ struct mtx newnfsd_mtx; struct mtx nfs_sockl_mutex; struct mtx nfs_state_mutex; struct mtx nfs_nameid_mutex; struct mtx nfs_req_mutex; struct mtx nfs_slock_mutex; struct mtx nfs_clstate_mutex; /* local functions */ static int nfssvc_call(struct thread *, struct nfssvc_args *, struct ucred *); #ifdef __NO_STRICT_ALIGNMENT /* * These architectures don't need re-alignment, so just return. */ int newnfs_realign(struct mbuf **pm, int how) { return (0); } #else /* !__NO_STRICT_ALIGNMENT */ /* * newnfs_realign: * * Check for badly aligned mbuf data and realign by copying the unaligned * portion of the data into a new mbuf chain and freeing the portions * of the old chain that were replaced. * * We cannot simply realign the data within the existing mbuf chain * because the underlying buffers may contain other rpc commands and * we cannot afford to overwrite them. * * We would prefer to avoid this situation entirely. The situation does * not occur with NFS/UDP and is supposed to only occasionally occur * with TCP. Use vfs.nfs.realign_count and realign_test to check this. * */ int newnfs_realign(struct mbuf **pm, int how) { struct mbuf *m, *n; int off, space; ++nfs_realign_test; while ((m = *pm) != NULL) { if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { /* * NB: we can't depend on m_pkthdr.len to help us * decide what to do here. May not be worth doing * the m_length calculation as m_copyback will * expand the mbuf chain below as needed. */ space = m_length(m, NULL); if (space >= MINCLSIZE) { /* NB: m_copyback handles space > MCLBYTES */ n = m_getcl(how, MT_DATA, 0); } else n = m_get(how, MT_DATA); if (n == NULL) return (ENOMEM); /* * Align the remainder of the mbuf chain. */ n->m_len = 0; off = 0; while (m != NULL) { m_copyback(n, off, m->m_len, mtod(m, caddr_t)); off += m->m_len; m = m->m_next; } m_freem(*pm); *pm = n; ++nfs_realign_count; break; } pm = &m->m_next; } return (0); } #endif /* __NO_STRICT_ALIGNMENT */ #ifdef notdef static void nfsrv_object_create(struct vnode *vp, struct thread *td) { if (vp == NULL || vp->v_type != VREG) return; (void) vfs_object_create(vp, td, td->td_ucred); } #endif /* * Look up a file name. Basically just initialize stuff and call namei(). */ int nfsrv_lookupfilename(struct nameidata *ndp, char *fname, NFSPROC_T *p) { int error; NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fname, p); error = namei(ndp); if (!error) { NDFREE(ndp, NDF_ONLY_PNBUF); } return (error); } /* * Copy NFS uid, gids to the cred structure. */ void newnfs_copycred(struct nfscred *nfscr, struct ucred *cr) { KASSERT(nfscr->nfsc_ngroups >= 0, ("newnfs_copycred: negative nfsc_ngroups")); cr->cr_uid = nfscr->nfsc_uid; crsetgroups(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups); } /* * Map args from nfsmsleep() to msleep(). */ int nfsmsleep(void *chan, void *mutex, int prio, const char *wmesg, struct timespec *ts) { u_int64_t nsecval; int error, timeo; if (ts) { timeo = hz * ts->tv_sec; nsecval = (u_int64_t)ts->tv_nsec; nsecval = ((nsecval * ((u_int64_t)hz)) + 500000000) / 1000000000; timeo += (int)nsecval; } else { timeo = 0; } error = msleep(chan, (struct mtx *)mutex, prio, wmesg, timeo); return (error); } /* * Get the file system info for the server. For now, just assume FFS. */ void nfsvno_getfs(struct nfsfsinfo *sip, int isdgram) { int pref; /* * XXX * There should be file system VFS OP(s) to get this information. * For now, assume ufs. */ if (isdgram) pref = NFS_MAXDGRAMDATA; else pref = NFS_SRVMAXIO; sip->fs_rtmax = NFS_SRVMAXIO; sip->fs_rtpref = pref; sip->fs_rtmult = NFS_FABLKSIZE; sip->fs_wtmax = NFS_SRVMAXIO; sip->fs_wtpref = pref; sip->fs_wtmult = NFS_FABLKSIZE; sip->fs_dtpref = pref; sip->fs_maxfilesize = 0xffffffffffffffffull; sip->fs_timedelta.tv_sec = 0; sip->fs_timedelta.tv_nsec = 1; sip->fs_properties = (NFSV3FSINFO_LINK | NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); } /* * Do the pathconf vnode op. */ int nfsvno_pathconf(struct vnode *vp, int flag, register_t *retf, struct ucred *cred, struct thread *p) { int error; error = VOP_PATHCONF(vp, flag, retf); if (error == EOPNOTSUPP || error == EINVAL) { /* * Some file systems return EINVAL for name arguments not * supported and some return EOPNOTSUPP for this case. * So the NFSv3 Pathconf RPC doesn't fail for these cases, * just fake them. */ switch (flag) { case _PC_LINK_MAX: *retf = LINK_MAX; break; case _PC_NAME_MAX: *retf = NAME_MAX; break; case _PC_CHOWN_RESTRICTED: *retf = 1; break; case _PC_NO_TRUNC: *retf = 1; break; default: /* * Only happens if a _PC_xxx is added to the server, * but this isn't updated. */ *retf = 0; printf("nfsrvd pathconf flag=%d not supp\n", flag); } error = 0; } NFSEXITCODE(error); return (error); } /* Fake nfsrv_atroot. Just return 0 */ int nfsrv_atroot(struct vnode *vp, long *retp) { return (0); } /* * Set the credentials to refer to root. * If only the various BSDen could agree on whether cr_gid is a separate * field or cr_groups[0]... */ void newnfs_setroot(struct ucred *cred) { cred->cr_uid = 0; cred->cr_groups[0] = 0; cred->cr_ngroups = 1; } /* * Get the client credential. Used for Renew and recovery. */ struct ucred * newnfs_getcred(void) { struct ucred *cred; struct thread *td = curthread; cred = crdup(td->td_ucred); newnfs_setroot(cred); return (cred); } /* * Nfs timer routine * Call the nfsd's timer function once/sec. */ void newnfs_timer(void *arg) { static time_t lasttime = 0; /* * Call the server timer, if set up. * The argument indicates if it is the next second and therefore * leases should be checked. */ if (lasttime != NFSD_MONOSEC) { lasttime = NFSD_MONOSEC; if (nfsd_call_servertimer != NULL) (*nfsd_call_servertimer)(); } callout_reset(&newnfsd_callout, nfscl_ticks, newnfs_timer, NULL); } /* * Sleep for a short period of time unless errval == NFSERR_GRACE, where * the sleep should be for 5 seconds. * Since lbolt doesn't exist in FreeBSD-CURRENT, just use a timeout on * an event that never gets a wakeup. Only return EINTR or 0. */ int nfs_catnap(int prio, int errval, const char *wmesg) { static int non_event; int ret; if (errval == NFSERR_GRACE) ret = tsleep(&non_event, prio, wmesg, 5 * hz); else ret = tsleep(&non_event, prio, wmesg, 1); if (ret != EINTR) ret = 0; return (ret); } /* * Get referral. For now, just fail. */ struct nfsreferral * nfsv4root_getreferral(struct vnode *vp, struct vnode *dvp, u_int32_t fileno) { return (NULL); } static int nfssvc_nfscommon(struct thread *td, struct nfssvc_args *uap) { int error; error = nfssvc_call(td, uap, td->td_ucred); NFSEXITCODE(error); return (error); } static int nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { int error = EINVAL, i, j; struct nfsd_idargs nid; struct nfsd_oidargs onid; struct { int vers; /* Just the first field of nfsstats. */ } nfsstatver; if (uap->flag & NFSSVC_IDNAME) { if ((uap->flag & NFSSVC_NEWSTRUCT) != 0) error = copyin(uap->argp, &nid, sizeof(nid)); else { error = copyin(uap->argp, &onid, sizeof(onid)); if (error == 0) { nid.nid_flag = onid.nid_flag; nid.nid_uid = onid.nid_uid; nid.nid_gid = onid.nid_gid; nid.nid_usermax = onid.nid_usermax; nid.nid_usertimeout = onid.nid_usertimeout; nid.nid_name = onid.nid_name; nid.nid_namelen = onid.nid_namelen; nid.nid_ngroup = 0; nid.nid_grps = NULL; } } if (error) goto out; error = nfssvc_idname(&nid); goto out; } else if (uap->flag & NFSSVC_GETSTATS) { if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { /* Copy fields to the old ext_nfsstat structure. */ oldnfsstats.attrcache_hits = nfsstatsv1.attrcache_hits; oldnfsstats.attrcache_misses = nfsstatsv1.attrcache_misses; oldnfsstats.lookupcache_hits = nfsstatsv1.lookupcache_hits; oldnfsstats.lookupcache_misses = nfsstatsv1.lookupcache_misses; oldnfsstats.direofcache_hits = nfsstatsv1.direofcache_hits; oldnfsstats.direofcache_misses = nfsstatsv1.direofcache_misses; oldnfsstats.accesscache_hits = nfsstatsv1.accesscache_hits; oldnfsstats.accesscache_misses = nfsstatsv1.accesscache_misses; oldnfsstats.biocache_reads = nfsstatsv1.biocache_reads; oldnfsstats.read_bios = nfsstatsv1.read_bios; oldnfsstats.read_physios = nfsstatsv1.read_physios; oldnfsstats.biocache_writes = nfsstatsv1.biocache_writes; oldnfsstats.write_bios = nfsstatsv1.write_bios; oldnfsstats.write_physios = nfsstatsv1.write_physios; oldnfsstats.biocache_readlinks = nfsstatsv1.biocache_readlinks; oldnfsstats.readlink_bios = nfsstatsv1.readlink_bios; oldnfsstats.biocache_readdirs = nfsstatsv1.biocache_readdirs; oldnfsstats.readdir_bios = nfsstatsv1.readdir_bios; for (i = 0; i < NFSV4_NPROCS; i++) oldnfsstats.rpccnt[i] = nfsstatsv1.rpccnt[i]; oldnfsstats.rpcretries = nfsstatsv1.rpcretries; for (i = 0; i < NFSV4OP_NOPS; i++) oldnfsstats.srvrpccnt[i] = nfsstatsv1.srvrpccnt[i]; for (i = NFSV42_NOPS, j = NFSV4OP_NOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) oldnfsstats.srvrpccnt[j] = nfsstatsv1.srvrpccnt[i]; oldnfsstats.srvrpc_errs = nfsstatsv1.srvrpc_errs; oldnfsstats.srv_errs = nfsstatsv1.srv_errs; oldnfsstats.rpcrequests = nfsstatsv1.rpcrequests; oldnfsstats.rpctimeouts = nfsstatsv1.rpctimeouts; oldnfsstats.rpcunexpected = nfsstatsv1.rpcunexpected; oldnfsstats.rpcinvalid = nfsstatsv1.rpcinvalid; oldnfsstats.srvcache_inproghits = nfsstatsv1.srvcache_inproghits; oldnfsstats.srvcache_idemdonehits = nfsstatsv1.srvcache_idemdonehits; oldnfsstats.srvcache_nonidemdonehits = nfsstatsv1.srvcache_nonidemdonehits; oldnfsstats.srvcache_misses = nfsstatsv1.srvcache_misses; oldnfsstats.srvcache_tcppeak = nfsstatsv1.srvcache_tcppeak; oldnfsstats.srvcache_size = nfsstatsv1.srvcache_size; oldnfsstats.srvclients = nfsstatsv1.srvclients; oldnfsstats.srvopenowners = nfsstatsv1.srvopenowners; oldnfsstats.srvopens = nfsstatsv1.srvopens; oldnfsstats.srvlockowners = nfsstatsv1.srvlockowners; oldnfsstats.srvlocks = nfsstatsv1.srvlocks; oldnfsstats.srvdelegates = nfsstatsv1.srvdelegates; for (i = 0; i < NFSV4OP_CBNOPS; i++) oldnfsstats.cbrpccnt[i] = nfsstatsv1.cbrpccnt[i]; oldnfsstats.clopenowners = nfsstatsv1.clopenowners; oldnfsstats.clopens = nfsstatsv1.clopens; oldnfsstats.cllockowners = nfsstatsv1.cllockowners; oldnfsstats.cllocks = nfsstatsv1.cllocks; oldnfsstats.cldelegates = nfsstatsv1.cldelegates; oldnfsstats.cllocalopenowners = nfsstatsv1.cllocalopenowners; oldnfsstats.cllocalopens = nfsstatsv1.cllocalopens; oldnfsstats.cllocallockowners = nfsstatsv1.cllocallockowners; oldnfsstats.cllocallocks = nfsstatsv1.cllocallocks; error = copyout(&oldnfsstats, uap->argp, sizeof (oldnfsstats)); } else { error = copyin(uap->argp, &nfsstatver, sizeof(nfsstatver)); if (error == 0 && nfsstatver.vers != NFSSTATS_V1) error = EPERM; if (error == 0) error = copyout(&nfsstatsv1, uap->argp, sizeof (nfsstatsv1)); } if (error == 0) { if ((uap->flag & NFSSVC_ZEROCLTSTATS) != 0) { nfsstatsv1.attrcache_hits = 0; nfsstatsv1.attrcache_misses = 0; nfsstatsv1.lookupcache_hits = 0; nfsstatsv1.lookupcache_misses = 0; nfsstatsv1.direofcache_hits = 0; nfsstatsv1.direofcache_misses = 0; nfsstatsv1.accesscache_hits = 0; nfsstatsv1.accesscache_misses = 0; nfsstatsv1.biocache_reads = 0; nfsstatsv1.read_bios = 0; nfsstatsv1.read_physios = 0; nfsstatsv1.biocache_writes = 0; nfsstatsv1.write_bios = 0; nfsstatsv1.write_physios = 0; nfsstatsv1.biocache_readlinks = 0; nfsstatsv1.readlink_bios = 0; nfsstatsv1.biocache_readdirs = 0; nfsstatsv1.readdir_bios = 0; nfsstatsv1.rpcretries = 0; nfsstatsv1.rpcrequests = 0; nfsstatsv1.rpctimeouts = 0; nfsstatsv1.rpcunexpected = 0; nfsstatsv1.rpcinvalid = 0; bzero(nfsstatsv1.rpccnt, sizeof(nfsstatsv1.rpccnt)); } if ((uap->flag & NFSSVC_ZEROSRVSTATS) != 0) { nfsstatsv1.srvrpc_errs = 0; nfsstatsv1.srv_errs = 0; nfsstatsv1.srvcache_inproghits = 0; nfsstatsv1.srvcache_idemdonehits = 0; nfsstatsv1.srvcache_nonidemdonehits = 0; nfsstatsv1.srvcache_misses = 0; nfsstatsv1.srvcache_tcppeak = 0; bzero(nfsstatsv1.srvrpccnt, sizeof(nfsstatsv1.srvrpccnt)); bzero(nfsstatsv1.cbrpccnt, sizeof(nfsstatsv1.cbrpccnt)); } } goto out; } else if (uap->flag & NFSSVC_NFSUSERDPORT) { u_short sockport; error = copyin(uap->argp, (caddr_t)&sockport, sizeof (u_short)); if (!error) error = nfsrv_nfsuserdport(sockport, p); } else if (uap->flag & NFSSVC_NFSUSERDDELPORT) { nfsrv_nfsuserddelport(); error = 0; } out: NFSEXITCODE(error); return (error); } /* * called by all three modevent routines, so that it gets things * initialized soon enough. */ void newnfs_portinit(void) { static int inited = 0; if (inited) return; inited = 1; /* Initialize SMP locks used by both client and server. */ mtx_init(&newnfsd_mtx, "newnfsd_mtx", NULL, MTX_DEF); mtx_init(&nfs_state_mutex, "nfs_state_mutex", NULL, MTX_DEF); mtx_init(&nfs_clstate_mutex, "nfs_clstate_mutex", NULL, MTX_DEF); } /* * Determine if the file system supports NFSv4 ACLs. * Return 1 if it does, 0 otherwise. */ int nfs_supportsnfsv4acls(struct vnode *vp) { int error; register_t retval; ASSERT_VOP_LOCKED(vp, "nfs supports nfsv4acls"); if (nfsrv_useacl == 0) return (0); error = VOP_PATHCONF(vp, _PC_ACL_NFS4, &retval); if (error == 0 && retval != 0) return (1); return (0); +} + +/* + * These are the first fields of all the context structures passed into + * nfs_pnfsio(). + */ +struct pnfsio { + int done; + int inprog; + struct task tsk; +}; + +/* + * Do a mirror I/O on a pNFS thread. + */ +int +nfs_pnfsio(task_fn_t *func, void *context) +{ + struct pnfsio *pio; + int ret; + static struct taskqueue *pnfsioq = NULL; + + pio = (struct pnfsio *)context; + if (pnfsioq == NULL) { + if (nfs_pnfsiothreads == 0) + nfs_pnfsiothreads = mp_ncpus * 8; + pnfsioq = taskqueue_create("pnfsioq", M_WAITOK, + taskqueue_thread_enqueue, &pnfsioq); + if (pnfsioq == NULL) + return (ENOMEM); + ret = taskqueue_start_threads(&pnfsioq, nfs_pnfsiothreads, + 0, "pnfsiot"); + if (ret != 0) { + taskqueue_free(pnfsioq); + pnfsioq = NULL; + return (ret); + } + } + pio->inprog = 1; + TASK_INIT(&pio->tsk, 0, func, context); + ret = taskqueue_enqueue(pnfsioq, &pio->tsk); + if (ret != 0) + pio->inprog = 0; + return (ret); } extern int (*nfsd_call_nfscommon)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfscommon_modevent(module_t mod, int type, void *data) { int error = 0; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) goto out; newnfs_portinit(); mtx_init(&nfs_nameid_mutex, "nfs_nameid_mutex", NULL, MTX_DEF); mtx_init(&nfs_sockl_mutex, "nfs_sockl_mutex", NULL, MTX_DEF); mtx_init(&nfs_slock_mutex, "nfs_slock_mutex", NULL, MTX_DEF); mtx_init(&nfs_req_mutex, "nfs_req_mutex", NULL, MTX_DEF); mtx_init(&nfsrv_nfsuserdsock.nr_mtx, "nfsuserd", NULL, MTX_DEF); mtx_init(&nfsrv_dslock_mtx, "nfs4ds", NULL, MTX_DEF); mtx_init(&nfsrv_dsclock_mtx, "nfsdsc", NULL, MTX_DEF); mtx_init(&nfsrv_dsrmlock_mtx, "nfsdsrm", NULL, MTX_DEF); mtx_init(&nfsrv_dwrpclock_mtx, "nfsdwrpc", NULL, MTX_DEF); mtx_init(&nfsrv_dsrpclock_mtx, "nfsdsrpc", NULL, MTX_DEF); mtx_init(&nfsrv_darpclock_mtx, "nfsdarpc", NULL, MTX_DEF); TAILQ_INIT(&nfsrv_devidhead); callout_init(&newnfsd_callout, 1); newnfs_init(); nfsd_call_nfscommon = nfssvc_nfscommon; loaded = 1; break; case MOD_UNLOAD: if (newnfs_numnfsd != 0 || nfsrv_nfsuserd != 0 || nfs_numnfscbd != 0) { error = EBUSY; break; } nfsd_call_nfscommon = NULL; callout_drain(&newnfsd_callout); /* Clean out the name<-->id cache. */ nfsrv_cleanusergroup(); /* and get rid of the mutexes */ mtx_destroy(&nfs_nameid_mutex); mtx_destroy(&newnfsd_mtx); mtx_destroy(&nfs_state_mutex); mtx_destroy(&nfs_clstate_mutex); mtx_destroy(&nfs_sockl_mutex); mtx_destroy(&nfs_slock_mutex); mtx_destroy(&nfs_req_mutex); mtx_destroy(&nfsrv_nfsuserdsock.nr_mtx); mtx_destroy(&nfsrv_dslock_mtx); mtx_destroy(&nfsrv_dsclock_mtx); mtx_destroy(&nfsrv_dsrmlock_mtx); mtx_destroy(&nfsrv_dwrpclock_mtx); mtx_destroy(&nfsrv_dsrpclock_mtx); mtx_destroy(&nfsrv_darpclock_mtx); loaded = 0; break; default: error = EOPNOTSUPP; break; } out: NFSEXITCODE(error); return error; } static moduledata_t nfscommon_mod = { "nfscommon", nfscommon_modevent, NULL, }; DECLARE_MODULE(nfscommon, nfscommon_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfscommon, 1); MODULE_DEPEND(nfscommon, nfssvc, 1, 1, 1); MODULE_DEPEND(nfscommon, krpc, 1, 1, 1); Index: projects/pnfs-planb-server-stable11/sys/fs/nfs/nfsclstate.h =================================================================== --- projects/pnfs-planb-server-stable11/sys/fs/nfs/nfsclstate.h (revision 324529) +++ projects/pnfs-planb-server-stable11/sys/fs/nfs/nfsclstate.h (revision 324530) @@ -1,437 +1,438 @@ /*- * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSCLSTATE_H_ #define _NFS_NFSCLSTATE_H_ /* * Definitions for NFS V4 client state handling. */ LIST_HEAD(nfsclopenhead, nfsclopen); LIST_HEAD(nfscllockownerhead, nfscllockowner); SLIST_HEAD(nfscllockownerfhhead, nfscllockownerfh); LIST_HEAD(nfscllockhead, nfscllock); LIST_HEAD(nfsclhead, nfsclclient); LIST_HEAD(nfsclownerhead, nfsclowner); TAILQ_HEAD(nfscldeleghead, nfscldeleg); LIST_HEAD(nfscldeleghash, nfscldeleg); TAILQ_HEAD(nfscllayouthead, nfscllayout); LIST_HEAD(nfscllayouthash, nfscllayout); LIST_HEAD(nfsclflayouthead, nfsclflayout); LIST_HEAD(nfscldevinfohead, nfscldevinfo); LIST_HEAD(nfsclrecalllayouthead, nfsclrecalllayout); #define NFSCLDELEGHASHSIZE 256 #define NFSCLDELEGHASH(c, f, l) \ (&((c)->nfsc_deleghash[ncl_hash((f), (l)) % NFSCLDELEGHASHSIZE])) #define NFSCLLAYOUTHASHSIZE 256 #define NFSCLLAYOUTHASH(c, f, l) \ (&((c)->nfsc_layouthash[ncl_hash((f), (l)) % NFSCLLAYOUTHASHSIZE])) /* Structure for NFSv4.1 session stuff. */ struct nfsclsession { struct mtx nfsess_mtx; struct nfsslot nfsess_cbslots[NFSV4_CBSLOTS]; nfsquad_t nfsess_clientid; SVCXPRT *nfsess_xprt; /* For backchannel callback */ uint32_t nfsess_slotseq[64]; /* Max for 64bit nm_slots */ uint64_t nfsess_slots; uint32_t nfsess_sequenceid; uint32_t nfsess_maxcache; /* Max size for cached reply. */ uint16_t nfsess_foreslots; uint16_t nfsess_backslots; uint8_t nfsess_sessionid[NFSX_V4SESSIONID]; uint8_t nfsess_defunct; /* Non-zero for old sessions */ }; /* * This structure holds the session, clientid and related information * needed for an NFSv4.1 Meta Data Server (MDS) or Data Server (DS). * It is malloc'd to the correct length. */ struct nfsclds { TAILQ_ENTRY(nfsclds) nfsclds_list; struct nfsclsession nfsclds_sess; struct mtx nfsclds_mtx; struct nfssockreq *nfsclds_sockp; time_t nfsclds_expire; uint16_t nfsclds_flags; uint16_t nfsclds_servownlen; uint8_t nfsclds_verf[NFSX_VERF]; uint8_t nfsclds_serverown[0]; }; /* * Flags for nfsclds_flags. */ #define NFSCLDS_HASWRITEVERF 0x0001 #define NFSCLDS_MDS 0x0002 #define NFSCLDS_DS 0x0004 +#define NFSCLDS_CLOSED 0x0008 struct nfsclclient { LIST_ENTRY(nfsclclient) nfsc_list; struct nfsclownerhead nfsc_owner; struct nfscldeleghead nfsc_deleg; struct nfscldeleghash nfsc_deleghash[NFSCLDELEGHASHSIZE]; struct nfscllayouthead nfsc_layout; struct nfscllayouthash nfsc_layouthash[NFSCLLAYOUTHASHSIZE]; struct nfscldevinfohead nfsc_devinfo; struct nfsv4lock nfsc_lock; struct proc *nfsc_renewthread; struct nfsmount *nfsc_nmp; time_t nfsc_expire; u_int32_t nfsc_clientidrev; u_int32_t nfsc_rev; u_int32_t nfsc_renew; u_int32_t nfsc_cbident; u_int16_t nfsc_flags; u_int16_t nfsc_idlen; u_int8_t nfsc_id[1]; /* Malloc'd to correct length */ }; /* * Bits for nfsc_flags. */ #define NFSCLFLAGS_INITED 0x0001 #define NFSCLFLAGS_HASCLIENTID 0x0002 #define NFSCLFLAGS_RECOVER 0x0004 #define NFSCLFLAGS_UMOUNT 0x0008 #define NFSCLFLAGS_HASTHREAD 0x0010 #define NFSCLFLAGS_AFINET6 0x0020 #define NFSCLFLAGS_EXPIREIT 0x0040 #define NFSCLFLAGS_FIRSTDELEG 0x0080 #define NFSCLFLAGS_GOTDELEG 0x0100 #define NFSCLFLAGS_RECVRINPROG 0x0200 struct nfsclowner { LIST_ENTRY(nfsclowner) nfsow_list; struct nfsclopenhead nfsow_open; struct nfsclclient *nfsow_clp; u_int32_t nfsow_seqid; u_int32_t nfsow_defunct; struct nfsv4lock nfsow_rwlock; u_int8_t nfsow_owner[NFSV4CL_LOCKNAMELEN]; }; /* * MALLOC'd to the correct length to accommodate the file handle. */ struct nfscldeleg { TAILQ_ENTRY(nfscldeleg) nfsdl_list; LIST_ENTRY(nfscldeleg) nfsdl_hash; struct nfsclownerhead nfsdl_owner; /* locally issued state */ struct nfscllockownerhead nfsdl_lock; nfsv4stateid_t nfsdl_stateid; struct acl_entry nfsdl_ace; /* Delegation ace */ struct nfsclclient *nfsdl_clp; struct nfsv4lock nfsdl_rwlock; /* for active I/O ops */ struct nfscred nfsdl_cred; /* Cred. used for Open */ time_t nfsdl_timestamp; /* used for stale cleanup */ u_int64_t nfsdl_sizelimit; /* Limit for file growth */ u_int64_t nfsdl_size; /* saved copy of file size */ u_int64_t nfsdl_change; /* and change attribute */ struct timespec nfsdl_modtime; /* local modify time */ u_int16_t nfsdl_fhlen; u_int8_t nfsdl_flags; u_int8_t nfsdl_fh[1]; /* must be last */ }; /* * nfsdl_flags bits. */ #define NFSCLDL_READ 0x01 #define NFSCLDL_WRITE 0x02 #define NFSCLDL_RECALL 0x04 #define NFSCLDL_NEEDRECLAIM 0x08 #define NFSCLDL_ZAPPED 0x10 #define NFSCLDL_MODTIMESET 0x20 #define NFSCLDL_DELEGRET 0x40 /* * MALLOC'd to the correct length to accommodate the file handle. */ struct nfsclopen { LIST_ENTRY(nfsclopen) nfso_list; struct nfscllockownerhead nfso_lock; nfsv4stateid_t nfso_stateid; struct nfsclowner *nfso_own; struct nfscred nfso_cred; /* Cred. used for Open */ u_int32_t nfso_mode; u_int32_t nfso_opencnt; u_int16_t nfso_fhlen; u_int8_t nfso_posixlock; /* 1 for POSIX type locking */ u_int8_t nfso_fh[1]; /* must be last */ }; /* * Return values for nfscl_open(). NFSCLOPEN_OK must == 0. */ #define NFSCLOPEN_OK 0 #define NFSCLOPEN_DOOPEN 1 #define NFSCLOPEN_DOOPENDOWNGRADE 2 #define NFSCLOPEN_SETCRED 3 struct nfscllockowner { LIST_ENTRY(nfscllockowner) nfsl_list; struct nfscllockhead nfsl_lock; struct nfsclopen *nfsl_open; NFSPROC_T *nfsl_inprog; nfsv4stateid_t nfsl_stateid; int nfsl_lockflags; u_int32_t nfsl_seqid; struct nfsv4lock nfsl_rwlock; u_int8_t nfsl_owner[NFSV4CL_LOCKNAMELEN]; u_int8_t nfsl_openowner[NFSV4CL_LOCKNAMELEN]; }; /* * Byte range entry for the above lock owner. */ struct nfscllock { LIST_ENTRY(nfscllock) nfslo_list; u_int64_t nfslo_first; u_int64_t nfslo_end; short nfslo_type; }; /* This structure is used to collect a list of lockowners to free up. */ struct nfscllockownerfh { SLIST_ENTRY(nfscllockownerfh) nfslfh_list; struct nfscllockownerhead nfslfh_lock; int nfslfh_len; uint8_t nfslfh_fh[NFSX_V4FHMAX]; }; /* * MALLOC'd to the correct length to accommodate the file handle. */ struct nfscllayout { TAILQ_ENTRY(nfscllayout) nfsly_list; LIST_ENTRY(nfscllayout) nfsly_hash; nfsv4stateid_t nfsly_stateid; struct nfsv4lock nfsly_lock; uint64_t nfsly_filesid[2]; uint64_t nfsly_lastbyte; struct nfsclflayouthead nfsly_flayread; struct nfsclflayouthead nfsly_flayrw; struct nfsclrecalllayouthead nfsly_recall; time_t nfsly_timestamp; struct nfsclclient *nfsly_clp; uint16_t nfsly_flags; uint16_t nfsly_fhlen; uint8_t nfsly_fh[1]; }; /* * Flags for nfsly_flags. */ #define NFSLY_FILES 0x0001 #define NFSLY_BLOCK 0x0002 #define NFSLY_OBJECT 0x0004 #define NFSLY_RECALL 0x0008 #define NFSLY_RECALLFILE 0x0010 #define NFSLY_RECALLFSID 0x0020 #define NFSLY_RECALLALL 0x0040 #define NFSLY_RETONCLOSE 0x0080 #define NFSLY_WRITTEN 0x0100 /* Has been used to write to a DS. */ #define NFSLY_FLEXFILE 0x0200 /* * Flex file layout mirror specific stuff for nfsclflayout. */ struct nfsffm { nfsv4stateid_t st; char dev[NFSX_V4DEVICEID]; uint32_t eff; uid_t user; gid_t group; struct nfsfh *fh[NFSDEV_MAXVERS]; uint16_t fhcnt; }; /* * MALLOC'd to the correct length to accommodate the file handle list for File * layout and the list of mirrors for the Flex File Layout. * These hang off of nfsly_flayread and nfsly_flayrw, sorted in increasing * offset order. * The nfsly_flayread list holds the ones with iomode == NFSLAYOUTIOMODE_READ, * whereas the nfsly_flayrw holds the ones with iomode == NFSLAYOUTIOMODE_RW. */ struct nfsclflayout { LIST_ENTRY(nfsclflayout) nfsfl_list; uint64_t nfsfl_off; uint64_t nfsfl_end; uint32_t nfsfl_iomode; struct nfscldevinfo *nfsfl_devp; uint16_t nfsfl_flags; union { struct { uint64_t patoff; uint32_t util; uint32_t stripe1; uint8_t dev[NFSX_V4DEVICEID]; uint16_t fhcnt; } fl; struct { uint64_t stripeunit; uint32_t fflags; uint32_t statshint; uint16_t mirrorcnt; } ff; } nfsfl_un; union { struct nfsfh *fh[0]; /* FH list for DS File layout */ struct nfsffm ffm[0]; /* Mirror list for Flex File */ } nfsfl_un2; /* Must be last. Malloc'd to correct array length */ }; #define nfsfl_patoff nfsfl_un.fl.patoff #define nfsfl_util nfsfl_un.fl.util #define nfsfl_stripe1 nfsfl_un.fl.stripe1 #define nfsfl_dev nfsfl_un.fl.dev #define nfsfl_fhcnt nfsfl_un.fl.fhcnt #define nfsfl_stripeunit nfsfl_un.ff.stripeunit #define nfsfl_fflags nfsfl_un.ff.fflags #define nfsfl_statshint nfsfl_un.ff.statshint #define nfsfl_mirrorcnt nfsfl_un.ff.mirrorcnt #define nfsfl_fh nfsfl_un2.fh #define nfsfl_ffm nfsfl_un2.ffm /* * Flags for nfsfl_flags. */ #define NFSFL_RECALL 0x0001 /* File layout has been recalled */ #define NFSFL_FILE 0x0002 /* File layout */ #define NFSFL_FLEXFILE 0x0004 /* Flex File layout */ /* * Structure that is used to store a LAYOUTRECALL. */ struct nfsclrecalllayout { LIST_ENTRY(nfsclrecalllayout) nfsrecly_list; uint64_t nfsrecly_off; uint64_t nfsrecly_len; int nfsrecly_recalltype; uint32_t nfsrecly_iomode; uint32_t nfsrecly_stateseqid; }; /* * Stores the NFSv4.1 Device Info. Malloc'd to the correct length to * store the list of network connections and list of indices. * nfsdi_data[] is allocated the following way: * - nfsdi_addrcnt * struct nfsclds * - stripe indices, each stored as one byte, since there can be many * of them. (This implies a limit of 256 on nfsdi_addrcnt, since the * indices select which address.) * For Flex File, the addrcnt is always one and no stripe indices exist. */ struct nfscldevinfo { LIST_ENTRY(nfscldevinfo) nfsdi_list; uint8_t nfsdi_deviceid[NFSX_V4DEVICEID]; struct nfsclclient *nfsdi_clp; uint32_t nfsdi_refcnt; uint32_t nfsdi_layoutrefs; union { struct { uint16_t stripecnt; } fl; struct { int versindex; uint32_t vers; uint32_t minorvers; uint32_t rsize; uint32_t wsize; } ff; } nfsdi_un; uint16_t nfsdi_addrcnt; uint16_t nfsdi_flags; struct nfsclds *nfsdi_data[0]; }; #define nfsdi_stripecnt nfsdi_un.fl.stripecnt #define nfsdi_versindex nfsdi_un.ff.versindex #define nfsdi_vers nfsdi_un.ff.vers #define nfsdi_minorvers nfsdi_un.ff.minorvers #define nfsdi_rsize nfsdi_un.ff.rsize #define nfsdi_wsize nfsdi_un.ff.wsize /* Flags for nfsdi_flags. */ #define NFSDI_FILELAYOUT 0x0001 #define NFSDI_FLEXFILE 0x0002 #define NFSDI_TIGHTCOUPLED 0X0004 /* These inline functions return values from nfsdi_data[]. */ /* * Return a pointer to the address at "pos". */ static __inline struct nfsclds ** nfsfldi_addr(struct nfscldevinfo *ndi, int pos) { if (pos >= ndi->nfsdi_addrcnt) return (NULL); return (&ndi->nfsdi_data[pos]); } /* * Return the Nth ("pos") stripe index. */ static __inline int nfsfldi_stripeindex(struct nfscldevinfo *ndi, int pos) { uint8_t *valp; if (pos >= ndi->nfsdi_stripecnt) return (-1); valp = (uint8_t *)&ndi->nfsdi_data[ndi->nfsdi_addrcnt]; valp += pos; return ((int)*valp); } /* * Set the Nth ("pos") stripe index to "val". */ static __inline void nfsfldi_setstripeindex(struct nfscldevinfo *ndi, int pos, uint8_t val) { uint8_t *valp; if (pos >= ndi->nfsdi_stripecnt) return; valp = (uint8_t *)&ndi->nfsdi_data[ndi->nfsdi_addrcnt]; valp += pos; *valp = val; } /* * Macro for incrementing the seqid#. */ #define NFSCL_INCRSEQID(s, n) do { \ if (((n)->nd_flag & ND_INCRSEQID)) \ (s)++; \ } while (0) #endif /* _NFS_NFSCLSTATE_H_ */ Index: projects/pnfs-planb-server-stable11/sys/fs/nfsclient/nfs_clrpcops.c =================================================================== --- projects/pnfs-planb-server-stable11/sys/fs/nfsclient/nfs_clrpcops.c (revision 324529) +++ projects/pnfs-planb-server-stable11/sys/fs/nfsclient/nfs_clrpcops.c (revision 324530) @@ -1,7399 +1,7587 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Rpc op calls, generally called from the vnode op calls or through the * buffer cache, for NFS v2, 3 and 4. * These do not normally make any changes to vnode arguments or use * structures that might change between the VFS variants. The returned * arguments are all at the end, after the NFSPROC_T *p one. */ #ifndef APPLEKEXT #include "opt_inet6.h" #include #include +#include SYSCTL_DECL(_vfs_nfs); static int nfsignore_eexist = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW, &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink"); /* * Global variables */ extern int nfs_numnfscbd; extern struct timeval nfsboottime; extern u_int32_t newnfs_false, newnfs_true; extern nfstype nfsv34_type[9]; extern int nfsrv_useacl; extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; extern int nfscl_debuglevel; +extern int nfs_pnfsiothreads; NFSCLSTATEMUTEX; int nfstest_outofseq = 0; int nfscl_assumeposixlocks = 1; int nfscl_enablecallb = 0; short nfsv4_cbport = NFSV4_CBPORT; int nfstest_openallsetattr = 0; #endif /* !APPLEKEXT */ #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) /* * nfscl_getsameserver() can return one of three values: * NFSDSP_USETHISSESSION - Use this session for the DS. * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new * session. * NFSDSP_NOTFOUND - No matching server was found. */ enum nfsclds_state { NFSDSP_USETHISSESSION = 0, NFSDSP_SEQTHISSESSION = 1, NFSDSP_NOTFOUND = 2, }; +/* + * Do a write RPC on a DS data file, using this structure for the arguments, + * so that this function can be executed by a separate kernel process. + */ +struct nfsclwritedsdorpc { + int done; + int inprog; + struct task tsk; + struct vnode *vp; + int iomode; + int must_commit; + nfsv4stateid_t *stateidp; + struct nfsclds *dsp; + uint64_t off; + int len; + struct nfsfh *fhp; + struct mbuf *m; + int vers; + int minorvers; + struct ucred *cred; + NFSPROC_T *p; + int err; +}; + static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *, struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *, nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *, nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *, int *); static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *, struct nfscllockowner *, u_int64_t, u_int64_t, u_int32_t, struct ucred *, NFSPROC_T *, int); static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *, struct acl *, nfsv4stateid_t *, void *); static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **, struct ucred *, NFSPROC_T *); static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *, struct sockaddr_in6 *, sa_family_t, int, struct nfsclds **, NFSPROC_T *); static void nfscl_initsessionslots(struct nfsclsession *); static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *, nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *, struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *, NFSPROC_T *); static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *, nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *, struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *, - struct ucred *, NFSPROC_T *); + struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *); static struct mbuf *nfsm_copym(struct mbuf *, int, int); static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *, struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int, struct ucred *, NFSPROC_T *); static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *, nfsv4stateid_t *, struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *); +static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *, + struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int, + struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *); static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *, struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int, struct ucred *, NFSPROC_T *); static enum nfsclds_state nfscl_getsameserver(struct nfsmount *, struct nfsclds *, struct nfsclds **); +static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *, + struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *, + NFSPROC_T *); static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *, struct nfsfh *, int, int, struct ucred *, NFSPROC_T *); static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t, uint64_t, uint64_t, nfsv4stateid_t *, int, int, int); static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *, NFSPROC_T *); static int nfsrv_parselayoutget(struct nfsrv_descript *, nfsv4stateid_t *, int *, struct nfsclflayouthead *); static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *, int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int, struct nfscldeleg **, struct ucred *, NFSPROC_T *); static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *, nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *, int *); static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *, int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int, struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *, struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *); static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *, nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *, int, int, int, int *, struct nfsclflayouthead *, int *); static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t, uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *, struct nfsclflayouthead *, struct ucred *, NFSPROC_T *, void *); static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *, int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **, struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *); +int nfs_pnfsio(task_fn_t *, void *); + /* * nfs null call from vfs. */ APPLESTATIC int nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p) { int error; struct nfsrv_descript nfsd, *nd = &nfsd; NFSCL_REQSTART(nd, NFSPROC_NULL, vp); error = nfscl_request(nd, vp, p, cred, NULL); if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs access rpc op. * For nfs version 3 and 4, use the access rpc to check accessibility. If file * modes are changed on the server, accesses might still fail later. */ APPLESTATIC int nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp) { int error; u_int32_t mode, rmode; if (acmode & VREAD) mode = NFSACCESS_READ; else mode = 0; if (vnode_vtype(vp) == VDIR) { if (acmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE); if (acmode & VEXEC) mode |= NFSACCESS_LOOKUP; } else { if (acmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (acmode & VEXEC) mode |= NFSACCESS_EXECUTE; } /* * Now, just call nfsrpc_accessrpc() to do the actual RPC. */ error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode, NULL); /* * The NFS V3 spec does not clarify whether or not * the returned access bits can be a superset of * the ones requested, so... */ if (!error && (rmode & mode) != mode) error = EACCES; return (error); } /* * The actual rpc, separated out for Darwin. */ APPLESTATIC int nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep, void *stuff) { u_int32_t *tl; u_int32_t supported, rmode; int error; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; *attrflagp = 0; supported = mode; NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(mode); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); supported = fxdr_unsigned(u_int32_t, *tl++); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); } rmode = fxdr_unsigned(u_int32_t, *tl); if (nd->nd_flag & ND_NFSV4) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); /* * It's not obvious what should be done about * unsupported access modes. For now, be paranoid * and clear the unsupported ones. */ rmode &= supported; *rmodep = rmode; } else error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs open rpc */ APPLESTATIC int nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p) { struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); u_int32_t mode, clidrev; int ret, newone, error, expireret = 0, retrycnt; /* * For NFSv4, Open Ops are only done on Regular Files. */ if (vnode_vtype(vp) != VREG) return (0); mode = 0; if (amode & FREAD) mode |= NFSV4OPEN_ACCESSREAD; if (amode & FWRITE) mode |= NFSV4OPEN_ACCESSWRITE; nfhp = np->n_fhp; retrycnt = 0; #ifdef notdef { char name[100]; int namel; namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99; bcopy(NFS4NODENAME(np->n_v4), name, namel); name[namel] = '\0'; printf("rpcopen p=0x%x name=%s",p->p_pid,name); if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]); else printf(" fhl=0\n"); } #endif do { dp = NULL; error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1, cred, p, NULL, &op, &newone, &ret, 1); if (error) { return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; if (ret == NFSCLOPEN_DOOPEN) { if (np->n_v4 != NULL) { /* * For the first attempt, try and get a layout, if * pNFS is enabled for the mount. */ if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0) error = nfsrpc_openrpc(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &dp, 0, 0x0, cred, p, 0, 0); else error = nfsrpc_getopenlayout(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &dp, cred, p); if (dp != NULL) { #ifdef APPLE OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag); #else NFSLOCKNODE(np); np->n_flag &= ~NDELEGMOD; /* * Invalidate the attribute cache, so that * attributes that pre-date the issue of a * delegation are not cached, since the * cached attributes will remain valid while * the delegation is held. */ NFSINVALATTRCACHE(np); NFSUNLOCKNODE(np); #endif (void) nfscl_deleg(nmp->nm_mountp, op->nfso_own->nfsow_clp, nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp); } } else { error = EIO; } newnfs_copyincred(cred, &op->nfso_cred); } else if (ret == NFSCLOPEN_SETCRED) /* * This is a new local open on a delegation. It needs * to have credentials so that an open can be done * against the server during recovery. */ newnfs_copyincred(cred, &op->nfso_cred); /* * nfso_opencnt is the count of how many VOP_OPEN()s have * been done on this Open successfully and a VOP_CLOSE() * is expected for each of these. * If error is non-zero, don't increment it, since the Open * hasn't succeeded yet. */ if (!error) op->nfso_opencnt++; nfscl_openrelease(nmp, op, error, newone); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } /* * the actual open rpc */ APPLESTATIC int nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op, u_int8_t *name, int namelen, struct nfscldeleg **dpp, int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p, int syscred, int recursed) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscldeleg *dp, *ndp = NULL; struct nfsvattr nfsva; u_int32_t rflags, deleg; nfsattrbit_t attrbits; int error, ret, acesize, limitby; struct nfsclsession *tsep; dp = *dpp; *dpp = NULL; nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE); if (reclaim) { *tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(delegtype); } else { if (dp != NULL) { *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; } else { *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); } (void) nfsm_strtom(nd, name, namelen); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); (void) nfsrv_putattrbit(nd, &attrbits); if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(u_int32_t, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) op->nfso_own->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); MALLOC(ndp, struct nfscldeleg *, sizeof (struct nfscldeleg) + newfhlen, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&ndp->nfsdl_owner); LIST_INIT(&ndp->nfsdl_lock); ndp->nfsdl_clp = op->nfso_own->nfsow_clp; ndp->nfsdl_fhlen = newfhlen; NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen); newnfs_copyincred(cred, &ndp->nfsdl_cred); nfscl_lockinit(&ndp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); ndp->nfsdl_stateid.seqid = *tl++; ndp->nfsdl_stateid.other[0] = *tl++; ndp->nfsdl_stateid.other[1] = *tl++; ndp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { ndp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: ndp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: ndp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); ndp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; } } else { ndp->nfsdl_flags = NFSCLDL_READ; } if (ret) ndp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret, &acesize, p); if (error) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) goto nfsmout; if (ndp != NULL) { ndp->nfsdl_change = nfsva.na_filerev; ndp->nfsdl_modtime = nfsva.na_mtime; ndp->nfsdl_flags |= NFSCLDL_MODTIMESET; } if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) { do { ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op, cred, p); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_open"); } while (ret == NFSERR_DELAY); error = ret; } if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) || nfscl_assumeposixlocks) op->nfso_posixlock = 1; else op->nfso_posixlock = 0; /* * If the server is handing out delegations, but we didn't * get one because an OpenConfirm was required, try the * Open again, to get a delegation. This is a harmless no-op, * from a server's point of view. */ if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) && (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) && !error && dp == NULL && ndp == NULL && !recursed) { do { ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, &ndp, 0, 0x0, cred, p, syscred, 1); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_open2"); } while (ret == NFSERR_DELAY); if (ret) { if (ndp != NULL) { FREE((caddr_t)ndp, M_NFSCLDELEG); ndp = NULL; } if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) error = ret; } } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALECLIENTID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: if (!error) *dpp = ndp; else if (ndp != NULL) FREE((caddr_t)ndp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * open downgrade rpc */ APPLESTATIC int nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED); if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp)))) *tl++ = 0; else *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl++ = op->nfso_stateid.other[2]; *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; } if (nd->nd_repstat && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * V4 Close operation. */ APPLESTATIC int nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p) { struct nfsclclient *clp; int error; if (vnode_vtype(vp) != VREG) return (0); if (doclose) error = nfscl_doclose(vp, &clp, p); else error = nfscl_getclose(vp, &clp); if (error) return (error); nfscl_clientrelease(clp); return (0); } /* * Close the open. */ APPLESTATIC void nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscllockowner *lp, *nlp; struct nfscllock *lop, *nlop; struct ucred *tcred; u_int64_t off = 0, len = 0; u_int32_t type = NFSV4LOCKT_READ; int error, do_unlock, trycnt; tcred = newnfs_getcred(); newnfs_copycred(&op->nfso_cred, tcred); /* * (Theoretically this could be done in the same * compound as the close, but having multiple * sequenced Ops in the same compound might be * too scary for some servers.) */ if (op->nfso_posixlock) { off = 0; len = NFS64BITSSET; type = NFSV4LOCKT_READ; } /* * Since this function is only called from VOP_INACTIVE(), no * other thread will be manipulating this Open. As such, the * lock lists are not being changed by other threads, so it should * be safe to do this without locking. */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { do_unlock = 1; LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) { if (op->nfso_posixlock == 0) { off = lop->nfslo_first; len = lop->nfslo_end - lop->nfslo_first; if (lop->nfslo_type == F_WRLCK) type = NFSV4LOCKT_WRITE; else type = NFSV4LOCKT_READ; } if (do_unlock) { trycnt = 0; do { error = nfsrpc_locku(nd, nmp, lp, off, len, type, tcred, p, 0); if ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfs_close"); } while ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0 && trycnt++ < 5); if (op->nfso_posixlock) do_unlock = 0; } nfscl_freelock(lop, 0); } /* * Do a ReleaseLockOwner. * The lock owner name nfsl_owner may be used by other opens for * other files but the lock_owner4 name that nfsrpc_rellockown() * puts on the wire has the file handle for this file appended * to it, so it can be done now. */ (void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh, lp->nfsl_open->nfso_fhlen, tcred, p); } /* * There could be other Opens for different files on the same * OpenOwner, so locking is required. */ NFSLOCKCLSTATE(); nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR); NFSUNLOCKCLSTATE(); do { error = nfscl_tryclose(op, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfs_close"); } while (error == NFSERR_GRACE); NFSLOCKCLSTATE(); nfscl_lockunlock(&op->nfso_own->nfsow_rwlock); LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) nfscl_freelockowner(lp, 0); nfscl_freeopen(op, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(tcred); } /* * The actual Close RPC. */ APPLESTATIC int nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error; nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh, op->nfso_fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl = op->nfso_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (nd->nd_repstat == 0) NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); error = nd->nd_repstat; if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * V4 Open Confirm RPC. */ APPLESTATIC int nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; int error; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4N(nmp)) return (0); /* No confirmation for NFSv4.1. */ nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl++ = op->nfso_stateid.other[2]; *tl = txdr_unsigned(op->nfso_own->nfsow_seqid); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; } error = nd->nd_repstat; if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs() * when a mount has just occurred and when the server replies NFSERR_EXPIRED. */ APPLESTATIC int nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; nfsattrbit_t attrbits; u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9]; u_short port; int error, isinet6 = 0, callblen; nfsquad_t confirm; u_int32_t lease; static u_int32_t rev = 0; struct nfsclds *dsp; struct in6_addr a6; struct nfsclsession *tsep; if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); clp->nfsc_rev = rev++; if (NFSHASNFSV4N(nmp)) { /* * Either there was no previous session or the * previous session has failed, so... * do an ExchangeID followed by the CreateSession. */ error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p); NFSCL_DEBUG(1, "aft exch=%d\n", error); if (error == 0) error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, &nmp->nm_sockreq, dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p); if (error == 0) { NFSLOCKMNT(nmp); /* * The old sessions cannot be safely free'd * here, since they may still be used by * in-progress RPCs. */ tsep = NULL; if (TAILQ_FIRST(&nmp->nm_sess) != NULL) tsep = NFSMNT_MDSSESSION(nmp); TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list); /* * Wake up RPCs waiting for a slot on the * old session. These will then fail with * NFSERR_BADSESSION and be retried with the * new session by nfsv4_setsequence(). * Also wakeup() processes waiting for the * new session. */ if (tsep != NULL) wakeup(&tsep->nfsess_slots); wakeup(&nmp->nm_sess); NFSUNLOCKMNT(nmp); } else nfscl_freenfsclds(dsp); NFSCL_DEBUG(1, "aft createsess=%d\n", error); if (error == 0 && reclaim == 0) { error = nfsrpc_reclaimcomplete(nmp, cred, p); NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error); if (error == NFSERR_COMPLETEALREADY || error == NFSERR_NOTSUPP) /* Ignore this error. */ error = 0; } return (error); } /* * Allocate a single session structure for NFSv4.0, because some of * the fields are used by NFSv4.0 although it doesn't do a session. */ dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO); mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); NFSLOCKMNT(nmp); TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list); tsep = NFSMNT_MDSSESSION(nmp); NFSUNLOCKMNT(nmp); nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(clp->nfsc_rev); (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); /* * set up the callback address */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_CALLBCKPROG); callblen = strlen(nfsv4_callbackaddr); if (callblen == 0) cp = nfscl_getmyip(nmp, &a6, &isinet6); if (nfscl_enablecallb && nfs_numnfscbd > 0 && (callblen > 0 || cp != NULL)) { port = htons(nfsv4_cbport); cp2 = (u_int8_t *)&port; #ifdef INET6 if ((callblen > 0 && strchr(nfsv4_callbackaddr, ':')) || isinet6) { char ip6buf[INET6_ADDRSTRLEN], *ip6add; (void) nfsm_strtom(nd, "tcp6", 4); if (callblen == 0) { ip6_sprintf(ip6buf, (struct in6_addr *)cp); ip6add = ip6buf; } else { ip6add = nfsv4_callbackaddr; } snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d", ip6add, cp2[0], cp2[1]); } else #endif { (void) nfsm_strtom(nd, "tcp", 3); if (callblen == 0) snprintf(addr, INET6_ADDRSTRLEN + 9, "%d.%d.%d.%d.%d.%d", cp[0], cp[1], cp[2], cp[3], cp2[0], cp2[1]); else snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d", nfsv4_callbackaddr, cp2[0], cp2[1]); } (void) nfsm_strtom(nd, addr, strlen(addr)); } else { (void) nfsm_strtom(nd, "tcp", 3); (void) nfsm_strtom(nd, "0.0.0.0.0.0", 11); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(clp->nfsc_cbident); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); tsep->nfsess_clientid.lval[0] = *tl++; tsep->nfsess_clientid.lval[1] = *tl++; confirm.lval[0] = *tl++; confirm.lval[1] = *tl; mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; /* * and confirm it. */ nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); *tl++ = tsep->nfsess_clientid.lval[0]; *tl++ = tsep->nfsess_clientid.lval[1]; *tl++ = confirm.lval[0]; *tl = confirm.lval[1]; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; if (nd->nd_repstat == 0) { nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh, nmp->nm_fhsize, NULL, NULL, 0, 0); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred); if (error) goto nfsmout; clp->nfsc_renew = NFSCL_RENEW(lease); clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clp->nfsc_clientidrev++; if (clp->nfsc_clientidrev == 0) clp->nfsc_clientidrev++; } } } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getattr call. */ APPLESTATIC int nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!nd->nd_repstat) error = nfsm_loadattr(nd, nap); else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getattr call with non-vnode arguemnts. */ APPLESTATIC int nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp, uint32_t *leasep) { struct nfsrv_descript nfsd, *nd = &nfsd; int error, vers = NFS_VER2; nfsattrbit_t attrbits; nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0); if (nd->nd_flag & ND_NFSV4) { vers = NFS_VER4; NFSGETATTR_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); } else if (nd->nd_flag & ND_NFSV3) { vers = NFS_VER3; } if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, vers, NULL, 1, xidp, NULL); if (error) return (error); if (nd->nd_repstat == 0) { if ((nd->nd_flag & ND_NFSV4) != 0) error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL, NULL, NULL); else error = nfsm_loadattr(nd, nap); } else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do an nfs setattr operation. */ APPLESTATIC int nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp, void *stuff) { int error, expireret = 0, openerr, retrycnt; u_int32_t clidrev = 0, mode; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsfh *nfhp; nfsv4stateid_t stateid; void *lckp; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; retrycnt = 0; do { lckp = NULL; openerr = 1; if (NFSHASNFSV4(nmp)) { nfhp = VTONFS(vp)->n_fhp; error = nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp); if (error && vnode_vtype(vp) == VREG && (mode == NFSV4OPEN_ACCESSWRITE || nfstest_openallsetattr)) { /* * No Open stateid, so try and open the file * now. */ if (mode == NFSV4OPEN_ACCESSWRITE) openerr = nfsrpc_open(vp, FWRITE, cred, p); else openerr = nfsrpc_open(vp, FREAD, cred, p); if (!openerr) (void) nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp); } } if (vap != NULL) error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p, rnap, attrflagp, stuff); else error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid, stuff); if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_OPENMODE; NFSUNLOCKMNT(nmp); } if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (!openerr) (void) nfsrpc_close(vp, 0, p); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_setattr"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4) || (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } static int nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp); if (nd->nd_flag & ND_NFSV4) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); vap->va_type = vnode_vtype(vp); nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } else if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff); if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error) error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error) error = nfscl_postop_attr(nd, rnap, attrflagp, stuff); mbuf_freem(nd->nd_mrep); if (nd->nd_repstat && !error) error = nd->nd_repstat; return (error); } /* * nfs lookup rpc */ APPLESTATIC int nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; struct nfsnode *np; struct nfsfh *nfhp; nfsattrbit_t attrbits; int error = 0, lookupp = 0; *attrflagp = 0; *dattrflagp = 0; if (vnode_vtype(dvp) != VDIR) return (ENOTDIR); nmp = VFSTONFS(vnode_mount(dvp)); if (len > NFS_MAXNAMLEN) return (ENAMETOOLONG); if (NFSHASNFSV4(nmp) && len == 1 && name[0] == '.') { /* * Just return the current dir's fh. */ np = VTONFS(dvp); MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + np->n_fhp->nfh_len, M_NFSFH, M_WAITOK); nfhp->nfh_len = np->n_fhp->nfh_len; NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len); *nfhpp = nfhp; return (0); } if (NFSHASNFSV4(nmp) && len == 2 && name[0] == '.' && name[1] == '.') { lookupp = 1; NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp); } else { NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp); (void) nfsm_strtom(nd, name, len); } if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, dvp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat) { /* * When an NFSv4 Lookupp returns ENOENT, it means that * the lookup is at the root of an fs, so return this dir. */ if (nd->nd_repstat == NFSERR_NOENT && lookupp) { np = VTONFS(dvp); MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + np->n_fhp->nfh_len, M_NFSFH, M_WAITOK); nfhp->nfh_len = np->n_fhp->nfh_len; NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len); *nfhpp = nfhp; mbuf_freem(nd->nd_mrep); return (0); } if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff); else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error == 0) *dattrflagp = 1; } goto nfsmout; } if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error != 0) goto nfsmout; *dattrflagp = 1; /* Skip over the Lookup and GetFH operation status values. */ NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); } error = nfsm_getfh(nd, nfhpp); if (error) goto nfsmout; error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if ((nd->nd_flag & ND_NFSV3) && !error) error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff); nfsmout: mbuf_freem(nd->nd_mrep); if (!error && nd->nd_repstat) error = nd->nd_repstat; return (error); } /* * Do a readlink rpc. */ APPLESTATIC int nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsnode *np = VTONFS(vp); nfsattrbit_t attrbits; int error, len, cangetattr = 1; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READLINK, vp); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (!nd->nd_repstat && !error) { NFSM_STRSIZ(len, NFS_MAXPATHLEN); /* * This seems weird to me, but must have been added to * FreeBSD for some reason. The only thing I can think of * is that there was/is some server that replies with * more link data than it should? */ if (len == NFS_MAXPATHLEN) { NFSLOCKNODE(np); if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) { len = np->n_size; cangetattr = 0; } NFSUNLOCKNODE(np); } error = nfsm_mbufuio(nd, uiop, len); if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Read operation. */ APPLESTATIC int nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { int error, expireret = 0, retrycnt; u_int32_t clidrev = 0; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); struct ucred *newcred; struct nfsfh *nfhp = NULL; nfsv4stateid_t stateid; void *lckp; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; newcred = cred; if (NFSHASNFSV4(nmp)) { nfhp = np->n_fhp; newcred = NFSNEWCRED(cred); } retrycnt = 0; do { lckp = NULL; if (NFSHASNFSV4(nmp)) (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid, &lckp); error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap, attrflagp, stuff); if (error == NFSERR_OPENMODE) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_OPENMODE; NFSUNLOCKMNT(nmp); } if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_read"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4) || (error == NFSERR_OPENMODE && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; if (NFSHASNFSV4(nmp)) NFSFREECRED(newcred); return (error); } /* * The actual read RPC. */ static int nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred, nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; int error = 0, len, retlen, tsiz, eof = 0; struct nfsrv_descript nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsrv_descript *nd = &nfsd; int rsize; off_t tmp_off; *attrflagp = 0; tsiz = uio_uio_resid(uiop); tmp_off = uiop->uio_offset + tsiz; NFSLOCKMNT(nmp); if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) { NFSUNLOCKMNT(nmp); return (EFBIG); } rsize = nmp->nm_rsize; NFSUNLOCKMNT(nmp); nd->nd_mrep = NULL; while (tsiz > 0) { *attrflagp = 0; len = (tsiz > rsize) ? rsize : tsiz; NFSCL_REQSTART(nd, NFSPROC_READ, vp); if (nd->nd_flag & ND_NFSV4) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3); if (nd->nd_flag & ND_NFSV2) { *tl++ = txdr_unsigned(uiop->uio_offset); *tl++ = txdr_unsigned(len); *tl = 0; } else { txdr_hyper(uiop->uio_offset, tl); *(tl + 2) = txdr_unsigned(len); } /* * Since I can't do a Getattr for NFSv4 for Write, there * doesn't seem any point in doing one here, either. * (See the comment in nfsrpc_writerpc() for more info.) */ error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = 1; } if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); } NFSM_STRSIZ(retlen, len); error = nfsm_mbufuio(nd, uiop, retlen); if (error) goto nfsmout; mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; tsiz -= retlen; if (!(nd->nd_flag & ND_NFSV2)) { if (eof || retlen == 0) tsiz = 0; } else if (retlen < len) tsiz = 0; } return (0); nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } /* * nfs write operation * When called_from_strategy != 0, it should return EIO for an error that * indicates recovery is in progress, so that the buffer will be left * dirty and be written back to the server later. If it loops around, * the recovery thread could get stuck waiting for the buffer and recovery * will then deadlock. */ APPLESTATIC int nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff, int called_from_strategy) { int error, expireret = 0, retrycnt, nostateid; u_int32_t clidrev = 0; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); struct ucred *newcred; struct nfsfh *nfhp = NULL; nfsv4stateid_t stateid; void *lckp; *must_commit = 0; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; newcred = cred; if (NFSHASNFSV4(nmp)) { newcred = NFSNEWCRED(cred); nfhp = np->n_fhp; } retrycnt = 0; do { lckp = NULL; nostateid = 0; if (NFSHASNFSV4(nmp)) { (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid, &lckp); if (stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { nostateid = 1; NFSCL_DEBUG(1, "stateid0 in write\n"); } } /* * If there is no stateid for NFSv4, it means this is an * extraneous write after close. Basically a poorly * implemented buffer cache. Just don't do the write. */ if (nostateid) error = 0; else error = nfsrpc_writerpc(vp, uiop, iomode, must_commit, newcred, &stateid, p, nap, attrflagp, stuff); if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_write"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_DELAY || ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error != 0 && (retrycnt >= 4 || ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0))) error = EIO; if (NFSHASNFSV4(nmp)) NFSFREECRED(newcred); return (error); } /* * The actual write RPC. */ static int nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC; int wccflag = 0, wsize; int32_t backup; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; nfsattrbit_t attrbits; off_t tmp_off; KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); *attrflagp = 0; tsiz = uio_uio_resid(uiop); tmp_off = uiop->uio_offset + tsiz; NFSLOCKMNT(nmp); if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) { NFSUNLOCKMNT(nmp); return (EFBIG); } wsize = nmp->nm_wsize; NFSUNLOCKMNT(nmp); nd->nd_mrep = NULL; /* NFSv2 sometimes does a write with */ nd->nd_repstat = 0; /* uio_resid == 0, so the while is not done */ while (tsiz > 0) { *attrflagp = 0; len = (tsiz > wsize) ? wsize : tsiz; NFSCL_REQSTART(nd, NFSPROC_WRITE, vp); if (nd->nd_flag & ND_NFSV4) { nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED); txdr_hyper(uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); } else if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED); txdr_hyper(uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); } else { u_int32_t x; NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* * Not sure why someone changed this, since the * RFC clearly states that "beginoffset" and * "totalcount" are ignored, but it wouldn't * surprise me if there's a busted server out there. */ /* Set both "begin" and "current" to non-garbage. */ x = txdr_unsigned((u_int32_t)uiop->uio_offset); *tl++ = x; /* "begin offset" */ *tl++ = x; /* "current offset" */ x = txdr_unsigned(len); *tl++ = x; /* total to this offset */ *tl = x; /* size of this write */ } nfsm_uiombuf(nd, uiop, len); /* * Although it is tempting to do a normal Getattr Op in the * NFSv4 compound, the result can be a nearly hung client * system if the Getattr asks for Owner and/or OwnerGroup. * It occurs when the client can't map either the Owner or * Owner_group name in the Getattr reply to a uid/gid. When * there is a cache miss, the kernel does an upcall to the * nfsuserd. Then, it can try and read the local /etc/passwd * or /etc/group file. It can then block in getnewbuf(), * waiting for dirty writes to be pushed to the NFS server. * The only reason this doesn't result in a complete * deadlock, is that the upcall times out and allows * the write to complete. However, progress is so slow * that it might just as well be deadlocked. * As such, we get the rest of the attributes, but not * Owner or Owner_group. * nb: nfscl_loadattrcache() needs to be told that these * partial attributes from a write rpc are being * passed in, via a argument flag. */ if (nd->nd_flag & ND_NFSV4) { NFSWRITEGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat) { /* * In case the rpc gets retried, roll * the uio fileds changed by nfsm_uiombuf() * back. */ uiop->uio_offset -= len; uio_uio_resid_add(uiop, len); uio_iov_base_add(uiop, -len); uio_iov_len_add(uiop, len); } if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { error = nfscl_wcc_data(nd, vp, nap, attrflagp, &wccflag, stuff); if (error) goto nfsmout; } if (!nd->nd_repstat) { if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); if (rlen == 0) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { backup = len - rlen; uio_iov_base_add(uiop, -(backup)); uio_iov_len_add(uiop, backup); uiop->uio_offset -= backup; uio_uio_resid_add(uiop, backup); len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest commitment level * obtained by any of the RPCs. */ if (committed == NFSWRITE_FILESYNC) committed = commit; else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; NFSLOCKMNT(nmp); if (!NFSHASWRITEVERF(nmp)) { NFSBCOPY((caddr_t)tl, (caddr_t)&nmp->nm_verf[0], NFSX_VERF); NFSSETWRITEVERF(nmp); } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); } NFSUNLOCKMNT(nmp); } if (nd->nd_flag & ND_NFSV4) NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = NFS_LATTR_NOSHRINK; } } else { error = nd->nd_repstat; } if (error) goto nfsmout; NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4)); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; tsiz -= len; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); *iomode = committed; if (nd->nd_repstat && !error) error = nd->nd_repstat; return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ APPLESTATIC int nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap, u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; int error = 0; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp); if (nd->nd_flag & ND_NFSV4) { if (vtyp == VBLK || vtyp == VCHR) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = vtonfsv34_type(vtyp); *tl++ = txdr_unsigned(NFSMAJOR(rdev)); *tl = txdr_unsigned(NFSMINOR(rdev)); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vtyp); } } (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vtyp); } if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) nfscl_fillsattr(nd, vap, dvp, 0, 0); if ((nd->nd_flag & ND_NFSV3) && (vtyp == VCHR || vtyp == VBLK)) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSMAJOR(rdev)); *tl = txdr_unsigned(NFSMINOR(rdev)); } if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } if (nd->nd_flag & ND_NFSV2) nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; } error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV3) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!error && nd->nd_repstat) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs file create call * Mostly just call the approriate routine. (I separated out v4, so that * error recovery wouldn't be as difficult.) */ APPLESTATIC int nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { int error = 0, newone, expireret = 0, retrycnt, unlocked; struct nfsclowner *owp; struct nfscldeleg *dp; struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp)); u_int32_t clidrev; if (NFSHASNFSV4(nmp)) { retrycnt = 0; do { dp = NULL; error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone, NULL, 1); if (error) return (error); if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || retrycnt > 0) error = nfsrpc_createv4(dvp, name, namelen, vap, cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff, &unlocked); else error = nfsrpc_getcreatelayout(dvp, name, namelen, vap, cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff, &unlocked); /* * There is no need to invalidate cached attributes here, * since new post-delegation issue attributes are always * returned by nfsrpc_createv4() and these will update the * attribute cache. */ if (dp != NULL) (void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp); nfscl_ownerrelease(nmp, owp, error, newone, unlocked); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; } else { error = nfsrpc_createv23(dvp, name, namelen, vap, cverf, fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff); } return (error); } /* * The create rpc for v2 and 3. */ static int nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; int error = 0; struct nfsrv_descript nfsd, *nd = &nfsd; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp); (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fmode & O_EXCL) { *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } } else { nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0); } error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV3) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } static int nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff, int *unlockedp) { u_int32_t *tl; int error = 0, deleg, newone, ret, acesize, limitby; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsclopen *op; struct nfscldeleg *dp = NULL; struct nfsnode *np; struct nfsfh *nfhp; nfsattrbit_t attrbits; nfsv4stateid_t stateid; u_int32_t rflags; struct nfsmount *nmp; struct nfsclsession *tsep; nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); *unlockedp = 0; *nfhpp = NULL; *dpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp); /* * For V4, this is actually an Open op. */ NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(owp->nfsow_seqid); *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD); *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CREATE); if (fmode & O_EXCL) { if (NFSHASNFSV4N(nmp)) { if (NFSHASSESSPERSIST(nmp)) { /* Use GUARDED for persistent sessions. */ *tl = txdr_unsigned(NFSCREATE_GUARDED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } else { /* Otherwise, use EXCLUSIVE4_1. */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; nfscl_fillsattr(nd, vap, dvp, 0, 0); } } else { /* NFSv4.0 */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; } } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); (void) nfsm_strtom(nd, name, namelen); /* Get the new file's handle and attributes. */ NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); /* Get the directory's post-op attributes. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); NFSCL_INCRSEQID(owp->nfsow_seqid, nd); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); stateid.seqid = *tl++; stateid.other[0] = *tl++; stateid.other[1] = *tl++; stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); (void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) owp->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); MALLOC(dp, struct nfscldeleg *, sizeof (struct nfscldeleg) + NFSX_V4FHMAX, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&dp->nfsdl_owner); LIST_INIT(&dp->nfsdl_lock); dp->nfsdl_clp = owp->nfsow_clp; newnfs_copyincred(cred, &dp->nfsdl_cred); nfscl_lockinit(&dp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); dp->nfsdl_stateid.seqid = *tl++; dp->nfsdl_stateid.other[0] = *tl++; dp->nfsdl_stateid.other[1] = *tl++; dp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { dp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: dp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: dp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); dp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; } } else { dp->nfsdl_flags = NFSCLDL_READ; } if (ret) dp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret, &acesize, p); if (error) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; /* Get rid of the PutFH and Getattr status values. */ NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error) goto nfsmout; *dattrflagp = 1; if (dp != NULL && *attrflagp) { dp->nfsdl_change = nnap->na_filerev; dp->nfsdl_modtime = nnap->na_mtime; dp->nfsdl_flags |= NFSCLDL_MODTIMESET; } /* * We can now complete the Open state. */ nfhp = *nfhpp; if (dp != NULL) { dp->nfsdl_fhlen = nfhp->nfh_len; NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len); } /* * Get an Open structure that will be * attached to the OpenOwner, acquired already. */ error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, NULL, &op, &newone, NULL, 0); if (error) goto nfsmout; op->nfso_stateid = stateid; newnfs_copyincred(cred, &op->nfso_cred); if ((rflags & NFSV4OPEN_RESULTCONFIRM)) { do { ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh, nfhp->nfh_len, op, cred, p); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_create"); } while (ret == NFSERR_DELAY); error = ret; } /* * If the server is handing out delegations, but we didn't * get one because an OpenConfirm was required, try the * Open again, to get a delegation. This is a harmless no-op, * from a server's point of view. */ if ((rflags & NFSV4OPEN_RESULTCONFIRM) && (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) && !error && dp == NULL) { do { ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op, name, namelen, &dp, 0, 0x0, cred, p, 0, 1); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_crt2"); } while (ret == NFSERR_DELAY); if (ret) { if (dp != NULL) { FREE((caddr_t)dp, M_NFSCLDELEG); dp = NULL; } if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) error = ret; } } nfscl_openrelease(nmp, op, error, newone); *unlockedp = 1; } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALECLIENTID) nfscl_initiate_recovery(owp->nfsow_clp); nfsmout: if (!error) *dpp = dp; else if (dp != NULL) FREE((caddr_t)dp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * Nfs remove rpc */ APPLESTATIC int nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsnode *np; struct nfsmount *nmp; nfsv4stateid_t dstateid; int error, ret = 0, i; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); nmp = VFSTONFS(vnode_mount(dvp)); tryagain: if (NFSHASNFSV4(nmp) && ret == 0) { ret = nfscl_removedeleg(vp, p, &dstateid); if (ret == 1) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = dstateid.seqid; *tl++ = dstateid.other[0]; *tl++ = dstateid.other[1]; *tl++ = dstateid.other[2]; *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(dvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_REMOVE); } } else { ret = 0; } if (ret == 0) NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp); (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { /* For NFSv4, parse out any Delereturn replies. */ if (ret > 0 && nd->nd_repstat != 0 && (nd->nd_flag & ND_NOMOREDATA)) { /* * If the Delegreturn failed, try again without * it. The server will Recall, as required. */ mbuf_freem(nd->nd_mrep); goto tryagain; } for (i = 0; i < (ret * 2); i++) { if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } } error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do an nfs rename rpc. */ APPLESTATIC int nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen, vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap, int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; struct nfsnode *np; nfsattrbit_t attrbits; nfsv4stateid_t fdstateid, tdstateid; int error = 0, ret = 0, gottd = 0, gotfd = 0, i; *fattrflagp = 0; *tattrflagp = 0; nmp = VFSTONFS(vnode_mount(fdvp)); if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); tryagain: if (NFSHASNFSV4(nmp) && ret == 0) { ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp, &tdstateid, &gottd, p); if (gotfd && gottd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp); } else if (gotfd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp); } else if (gottd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp); } if (gotfd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = fdstateid.seqid; *tl++ = fdstateid.other[0]; *tl++ = fdstateid.other[1]; *tl = fdstateid.other[2]; if (gottd) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(tvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_DELEGRETURN); } } if (gottd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = tdstateid.seqid; *tl++ = tdstateid.other[0]; *tl++ = tdstateid.other[1]; *tl = tdstateid.other[2]; } if (ret > 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(fdvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_SAVEFH); } } else { ret = 0; } if (ret == 0) NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh, VTONFS(tdvp)->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_RENAME); } (void) nfsm_strtom(nd, fnameptr, fnamelen); if (!(nd->nd_flag & ND_NFSV4)) (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh, VTONFS(tdvp)->n_fhp->nfh_len, 0); (void) nfsm_strtom(nd, tnameptr, tnamelen); error = nfscl_request(nd, fdvp, p, cred, fstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { /* For NFSv4, parse out any Delereturn replies. */ if (ret > 0 && nd->nd_repstat != 0 && (nd->nd_flag & ND_NOMOREDATA)) { /* * If the Delegreturn failed, try again without * it. The server will Recall, as required. */ mbuf_freem(nd->nd_mrep); goto tryagain; } for (i = 0; i < (ret * 2); i++) { if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) { if (i == 0 && ret > 1) { /* * If the Delegreturn failed, try again * without it. The server will Recall, as * required. * If ret > 1, the first iteration of this * loop is the second DelegReturn result. */ mbuf_freem(nd->nd_mrep); goto tryagain; } else { nd->nd_flag |= ND_NOMOREDATA; } } } } /* Now, the first wcc attribute reply. */ if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, fstuff); /* and the second wcc attribute reply. */ if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } if (!error) error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp, NULL, tstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs hard link create rpc */ APPLESTATIC int nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error = 0; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_LINK, vp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); } (void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh, VTONFS(dvp)->n_fhp->nfh_len, 0); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LINK); } (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, vp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, dstuff); if (!error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* * First, parse out the PutFH and Getattr result. */ NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (!(*(tl + 1))) NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; /* * Get the pre-op attributes. */ error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs symbolic link create rpc */ APPLESTATIC int nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target, struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; int slen, error = 0; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; nmp = VFSTONFS(vnode_mount(dvp)); slen = strlen(target); if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFLNK); (void) nfsm_strtom(nd, target, slen); } (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) nfscl_fillsattr(nd, vap, dvp, 0, 0); if (!(nd->nd_flag & ND_NFSV4)) (void) nfsm_strtom(nd, target, slen); if (nd->nd_flag & ND_NFSV2) nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if ((nd->nd_flag & ND_NFSV3) && !error) { if (!nd->nd_repstat) error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (!error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. * Only do this if vfs.nfs.ignore_eexist is set. * Never do this for NFSv4.1 or later minor versions, since sessions * should guarantee "exactly once" RPC semantics. */ if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) || nmp->nm_minorvers == 0)) error = 0; return (error); } /* * nfs make dir rpc */ APPLESTATIC int nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error = 0; struct nfsfh *fhp; struct nfsmount *nmp; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; nmp = VFSTONFS(vnode_mount(dvp)); fhp = VTONFS(dvp)->n_fhp; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFDIR); } (void) nfsm_strtom(nd, name, namelen); nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0); if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!nd->nd_repstat && !error) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); } if (!error) error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) { /* Get rid of the PutFH and Getattr status values. */ NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error == 0) *dattrflagp = 1; } } if ((nd->nd_flag & ND_NFSV3) && !error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. * Only do this if vfs.nfs.ignore_eexist is set. * Never do this for NFSv4.1 or later minor versions, since sessions * should guarantee "exactly once" RPC semantics. */ if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) || nmp->nm_minorvers == 0)) error = 0; return (error); } /* * nfs remove directory call */ APPLESTATIC int nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp); (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * Readdir rpc. * Always returns with either uio_resid unchanged, if you are at the * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks * filled in. * I felt this would allow caching of directory blocks more easily * than returning a pertially filled block. * Directory offset cookies: * Oh my, what to do with them... * I can think of three ways to deal with them: * 1 - have the layer above these RPCs maintain a map between logical * directory byte offsets and the NFS directory offset cookies * 2 - pass the opaque directory offset cookies up into userland * and let the libc functions deal with them, via the system call * 3 - return them to userland in the "struct dirent", so future versions * of libc can use them and do whatever is necessary to make things work * above these rpc calls, in the meantime * For now, I do #3 by "hiding" the directory offset cookies after the * d_name field in struct dirent. This is space inside d_reclen that * will be ignored by anything that doesn't know about them. * The directory offset cookies are filled in as the last 8 bytes of * each directory entry, after d_name. Someday, the userland libc * functions may be able to use these. In the meantime, it satisfies * OpenBSD's requirements for cookies being returned. * If expects the directory offset cookie for the read to be in uio_offset * and returns the one for the next entry after this directory block in * there, as well. */ APPLESTATIC int nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int *eofp, void *stuff) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; nfsquad_t cookie, ncookie; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *dnp = VTONFS(vp); struct nfsvattr nfsva; struct nfsrv_descript nfsd, *nd = &nfsd; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0; long dotfileid, dotdotfileid = 0; u_int32_t fakefileno = 0xffffffff, rderr; char *cp; nfsattrbit_t attrbits, dattrbits; u_int32_t *tl2 = NULL; size_t tresid; KASSERT(uiop->uio_iovcnt == 1 && (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0, ("nfs readdirrpc bad uio")); /* * There is no point in reading a lot more than uio_resid, however * adding one additional DIRBLKSIZ makes sense. Since uio_resid * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this * will never make readsize > nm_readdirsize. */ readsize = nmp->nm_readdirsize; if (readsize > uio_uio_resid(uiop)) readsize = uio_uio_resid(uiop) + DIRBLKSIZ; *attrflagp = 0; if (eofp) *eofp = 0; tresid = uio_uio_resid(uiop); cookie.lval[0] = cookiep->nfsuquad[0]; cookie.lval[1] = cookiep->nfsuquad[1]; nd->nd_mrep = NULL; /* * For NFSv4, first create the "." and ".." entries. */ if (NFSHASNFSV4(nmp)) { reqsize = 6 * NFSX_UNSIGNED; NFSGETATTR_ATTRBIT(&dattrbits); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE); if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, NFSATTRBIT_MOUNTEDONFILEID)) { NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); gotmnton = 1; } else { /* * Must fake it. Use the fileno, except when the * fsid is != to that of the directory. For that * case, generate a fake fileno that is not the same. */ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID); gotmnton = 0; } /* * Joy, oh joy. For V4 we get to hand craft '.' and '..'. */ if (uiop->uio_offset == 0) { NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); dotfileid = 0; /* Fake out the compiler. */ if ((nd->nd_flag & ND_NOMOREDATA) == 0) { error = nfsm_loadattr(nd, &nfsva); if (error != 0) goto nfsmout; dotfileid = nfsva.na_fileid; } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 4)); if (len > 0 && len <= NFSX_V4FHMAX) error = nfsm_advance(nd, NFSM_RNDUP(len), -1); else error = EPERM; if (!error) { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) { dotdotfileid = dotfileid; } else if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dotdotfileid = nfsva.na_mntonfileno; else dotdotfileid = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dotdotfileid = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dotdotfileid = fakefileno; } } } else if (nd->nd_repstat == NFSERR_NOENT) { /* * Lookupp returns NFSERR_NOENT when we are * at the root, so just use the current dir. */ nd->nd_repstat = 0; dotdotfileid = dotfileid; } else { error = nd->nd_repstat; } mbuf_freem(nd->nd_mrep); if (error) return (error); nd->nd_mrep = NULL; dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); dp->d_type = DT_DIR; dp->d_fileno = dotfileid; dp->d_namlen = 1; dp->d_name[0] = '.'; dp->d_name[1] = '\0'; dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[4]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); dp->d_type = DT_DIR; dp->d_fileno = dotdotfileid; dp->d_namlen = 2; dp->d_name[0] = '.'; dp->d_name[1] = '.'; dp->d_name[2] = '\0'; dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[4]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); } NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR); } else { reqsize = 5 * NFSX_UNSIGNED; } /* * Loop around doing readdir rpc's of size readsize. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READDIR, vp); if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = cookie.lval[1]; *tl = txdr_unsigned(readsize); } else { NFSM_BUILD(tl, u_int32_t *, reqsize); *tl++ = cookie.lval[0]; *tl++ = cookie.lval[1]; if (cookie.qval == 0) { *tl++ = 0; *tl++ = 0; } else { NFSLOCKNODE(dnp); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; NFSUNLOCKNODE(dnp); } if (nd->nd_flag & ND_NFSV4) { *tl++ = txdr_unsigned(readsize); *tl = txdr_unsigned(readsize); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &dattrbits); } else { *tl = txdr_unsigned(readsize); } } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!(nd->nd_flag & ND_NFSV2)) { if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (!nd->nd_repstat && !error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); NFSLOCKNODE(dnp); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; NFSUNLOCKNODE(dnp); } } if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); if (!more_dirs) tryformoredirs = 0; /* loop through the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; len = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); nfsva.na_fileid = fxdr_hyper(tl); tl += 2; len = fxdr_unsigned(int, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_fileid = fxdr_unsigned(long, *tl++); len = fxdr_unsigned(int, *tl); } if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; goto nfsmout; } tlen = NFSM_RNDUP(len); if (tlen == len) tlen += 4; /* To ensure null termination */ left = DIRBLKSIZ - blksiz; if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > left) { dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; blksiz = 0; } if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop)) bigenough = 0; if (bigenough) { dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uio_uio_resid_add(uiop, -(DIRHDSIZ)); uiop->uio_offset += DIRHDSIZ; uio_iov_base_add(uiop, DIRHDSIZ); uio_iov_len_add(uiop, -(DIRHDSIZ)); error = nfsm_mbufuio(nd, uiop, len); if (error) goto nfsmout; cp = CAST_DOWN(caddr_t, uio_iov_base(uiop)); tlen -= len; *cp = '\0'; /* null terminate */ cp += tlen; /* points to cookie storage */ tl2 = (u_int32_t *)cp; uio_iov_base_add(uiop, (tlen + NFSX_HYPER)); uio_iov_len_add(uiop, -(tlen + NFSX_HYPER)); uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER)); uiop->uio_offset += (tlen + NFSX_HYPER); } else { error = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV4) { rderr = 0; nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, &rderr, p, cred); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; } else { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); ncookie.lval[0] = 0; ncookie.lval[1] = *tl++; } if (bigenough) { if (nd->nd_flag & ND_NFSV4) { if (rderr) { dp->d_fileno = 0; } else { if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dp->d_fileno = nfsva.na_mntonfileno; else dp->d_fileno = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dp->d_fileno = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dp->d_fileno = fakefileno; } dp->d_type = vtonfs_dtype(nfsva.na_type); } } else { dp->d_fileno = nfsva.na_fileid; } *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] = ncookie.lval[0]; *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] = ncookie.lval[1]; } more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); if (tryformoredirs) more_dirs = !eof; if (nd->nd_flag & ND_NFSV4) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } } mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; } /* * If returning no data, assume end of file. * If not bigenough, return not end of file, since you aren't * returning all the data * Otherwise, return the eof flag from the server. */ if (eofp) { if (tresid == ((size_t)(uio_uio_resid(uiop)))) *eofp = 1; else if (!bigenough) *eofp = 0; else *eofp = eof; } /* * Add extra empty records to any remaining DIRBLKSIZ chunks. */ while (uio_uio_resid(uiop) > 0 && ((size_t)(uio_uio_resid(uiop))) != tresid) { dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); dp->d_type = DT_UNKNOWN; dp->d_fileno = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; tl = (u_int32_t *)&dp->d_name[4]; *tl++ = cookie.lval[0]; *tl = cookie.lval[1]; dp->d_reclen = DIRBLKSIZ; uio_iov_base_add(uiop, DIRBLKSIZ); uio_iov_len_add(uiop, -(DIRBLKSIZ)); uio_uio_resid_add(uiop, -(DIRBLKSIZ)); uiop->uio_offset += DIRBLKSIZ; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } #ifndef APPLE /* * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir(). * (Also used for NFS V4 when mount flag set.) * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.) */ APPLESTATIC int nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int *eofp, void *stuff) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; vnode_t newvp = NULLVP; struct nfsrv_descript nfsd, *nd = &nfsd; struct nameidata nami, *ndp = &nami; struct componentname *cnp = &ndp->ni_cnd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *dnp = VTONFS(vp), *np; struct nfsvattr nfsva; struct nfsfh *nfhp; nfsquad_t cookie, ncookie; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0; int isdotdot = 0, unlocknewvp = 0; long dotfileid, dotdotfileid = 0, fileno = 0; char *cp; nfsattrbit_t attrbits, dattrbits; size_t tresid; u_int32_t *tl2 = NULL, fakefileno = 0xffffffff, rderr; struct timespec dctime; KASSERT(uiop->uio_iovcnt == 1 && (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0, ("nfs readdirplusrpc bad uio")); timespecclear(&dctime); *attrflagp = 0; if (eofp != NULL) *eofp = 0; ndp->ni_dvp = vp; nd->nd_mrep = NULL; cookie.lval[0] = cookiep->nfsuquad[0]; cookie.lval[1] = cookiep->nfsuquad[1]; tresid = uio_uio_resid(uiop); /* * For NFSv4, first create the "." and ".." entries. */ if (NFSHASNFSV4(nmp)) { NFSGETATTR_ATTRBIT(&dattrbits); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID); if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, NFSATTRBIT_MOUNTEDONFILEID)) { NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); gotmnton = 1; } else { /* * Must fake it. Use the fileno, except when the * fsid is != to that of the directory. For that * case, generate a fake fileno that is not the same. */ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID); gotmnton = 0; } /* * Joy, oh joy. For V4 we get to hand craft '.' and '..'. */ if (uiop->uio_offset == 0) { NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); dotfileid = 0; /* Fake out the compiler. */ if ((nd->nd_flag & ND_NOMOREDATA) == 0) { error = nfsm_loadattr(nd, &nfsva); if (error != 0) goto nfsmout; dctime = nfsva.na_ctime; dotfileid = nfsva.na_fileid; } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 4)); if (len > 0 && len <= NFSX_V4FHMAX) error = nfsm_advance(nd, NFSM_RNDUP(len), -1); else error = EPERM; if (!error) { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) { dotdotfileid = dotfileid; } else if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dotdotfileid = nfsva.na_mntonfileno; else dotdotfileid = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dotdotfileid = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dotdotfileid = fakefileno; } } } else if (nd->nd_repstat == NFSERR_NOENT) { /* * Lookupp returns NFSERR_NOENT when we are * at the root, so just use the current dir. */ nd->nd_repstat = 0; dotdotfileid = dotfileid; } else { error = nd->nd_repstat; } mbuf_freem(nd->nd_mrep); if (error) return (error); nd->nd_mrep = NULL; dp = (struct dirent *)uio_iov_base(uiop); dp->d_type = DT_DIR; dp->d_fileno = dotfileid; dp->d_namlen = 1; dp->d_name[0] = '.'; dp->d_name[1] = '\0'; dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[4]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); dp = (struct dirent *)uio_iov_base(uiop); dp->d_type = DT_DIR; dp->d_fileno = dotdotfileid; dp->d_namlen = 2; dp->d_name[0] = '.'; dp->d_name[1] = '.'; dp->d_name[2] = '\0'; dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[4]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); } NFSREADDIRPLUS_ATTRBIT(&attrbits); if (gotmnton) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); } /* * Loop around doing readdir rpc's of size nm_readdirsize. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp); NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED); *tl++ = cookie.lval[0]; *tl++ = cookie.lval[1]; if (cookie.qval == 0) { *tl++ = 0; *tl++ = 0; } else { NFSLOCKNODE(dnp); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; NFSUNLOCKNODE(dnp); } *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_readdirsize); if (nd->nd_flag & ND_NFSV4) { (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &dattrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0) dctime = nap->na_ctime; NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); NFSLOCKNODE(dnp); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl++; NFSUNLOCKNODE(dnp); more_dirs = fxdr_unsigned(int, *tl); if (!more_dirs) tryformoredirs = 0; /* loop through the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); if (nd->nd_flag & ND_NFSV4) { ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; } else { fileno = fxdr_unsigned(long, *++tl); tl++; } len = fxdr_unsigned(int, *tl); if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; goto nfsmout; } tlen = NFSM_RNDUP(len); if (tlen == len) tlen += 4; /* To ensure null termination */ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ + NFSX_HYPER) > left) { dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; blksiz = 0; } if ((tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop)) bigenough = 0; if (bigenough) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uio_uio_resid_add(uiop, -(DIRHDSIZ)); uiop->uio_offset += DIRHDSIZ; uio_iov_base_add(uiop, DIRHDSIZ); uio_iov_len_add(uiop, -(DIRHDSIZ)); cnp->cn_nameptr = uio_iov_base(uiop); cnp->cn_namelen = len; NFSCNHASHZERO(cnp); error = nfsm_mbufuio(nd, uiop, len); if (error) goto nfsmout; cp = uio_iov_base(uiop); tlen -= len; *cp = '\0'; cp += tlen; /* points to cookie storage */ tl2 = (u_int32_t *)cp; if (len == 2 && cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.') isdotdot = 1; else isdotdot = 0; uio_iov_base_add(uiop, (tlen + NFSX_HYPER)); uio_iov_len_add(uiop, -(tlen + NFSX_HYPER)); uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER)); uiop->uio_offset += (tlen + NFSX_HYPER); } else { error = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (error) goto nfsmout; } nfhp = NULL; if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; attrflag = fxdr_unsigned(int, *tl); if (attrflag) { error = nfsm_loadattr(nd, &nfsva); if (error) goto nfsmout; } NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED); if (*tl) { error = nfsm_getfh(nd, &nfhp); if (error) goto nfsmout; } if (!attrflag && nfhp != NULL) { FREE((caddr_t)nfhp, M_NFSFH); nfhp = NULL; } } else { rderr = 0; nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, &rderr, p, cred); if (error) goto nfsmout; } if (bigenough) { if (nd->nd_flag & ND_NFSV4) { if (rderr) { dp->d_fileno = 0; } else if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dp->d_fileno = nfsva.na_mntonfileno; else dp->d_fileno = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dp->d_fileno = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dp->d_fileno = fakefileno; } } else { dp->d_fileno = fileno; } *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] = ncookie.lval[0]; *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] = ncookie.lval[1]; if (nfhp != NULL) { if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len, dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) { VREF(vp); newvp = vp; unlocknewvp = 0; FREE((caddr_t)nfhp, M_NFSFH); np = dnp; } else if (isdotdot != 0) { /* * Skip doing a nfscl_nget() call for "..". * There's a race between acquiring the nfs * node here and lookups that look for the * directory being read (in the parent). * It would try to get a lock on ".." here, * owning the lock on the directory being * read. Lookup will hold the lock on ".." * and try to acquire the lock on the * directory being read. * If the directory is unlocked/relocked, * then there is a LOR with the buflock * vp is relocked. */ free(nfhp, M_NFSFH); } else { error = nfscl_nget(vnode_mount(vp), vp, nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE); if (!error) { newvp = NFSTOV(np); unlocknewvp = 1; } } nfhp = NULL; if (newvp != NULLVP) { error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 0); if (error) { if (unlocknewvp) vput(newvp); else vrele(newvp); goto nfsmout; } dp->d_type = vtonfs_dtype(np->n_vattr.na_type); ndp->ni_vp = newvp; NFSCNHASH(cnp, HASHINIT); if (cnp->cn_namelen <= NCHNAMLEN && (newvp->v_type != VDIR || dctime.tv_sec != 0)) { cache_enter_time(ndp->ni_dvp, ndp->ni_vp, cnp, &nfsva.na_ctime, newvp->v_type != VDIR ? NULL : &dctime); } if (unlocknewvp) vput(newvp); else vrele(newvp); newvp = NULLVP; } } } else if (nfhp != NULL) { FREE((caddr_t)nfhp, M_NFSFH); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); if (tryformoredirs) more_dirs = !eof; if (nd->nd_flag & ND_NFSV4) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } } mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; } /* * If returning no data, assume end of file. * If not bigenough, return not end of file, since you aren't * returning all the data * Otherwise, return the eof flag from the server. */ if (eofp != NULL) { if (tresid == uio_uio_resid(uiop)) *eofp = 1; else if (!bigenough) *eofp = 0; else *eofp = eof; } /* * Add extra empty records to any remaining DIRBLKSIZ chunks. */ while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_type = DT_UNKNOWN; dp->d_fileno = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; tl = (u_int32_t *)&dp->d_name[4]; *tl++ = cookie.lval[0]; *tl = cookie.lval[1]; dp->d_reclen = DIRBLKSIZ; uio_iov_base_add(uiop, DIRBLKSIZ); uio_iov_len_add(uiop, -(DIRBLKSIZ)); uio_uio_resid_add(uiop, -(DIRBLKSIZ)); uiop->uio_offset += DIRBLKSIZ; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } #endif /* !APPLE */ /* * Nfs commit rpc */ APPLESTATIC int nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp); NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; *tl = txdr_unsigned(cnt); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff); if (!error && !nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); NFSLOCKMNT(nmp); if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) { NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); nd->nd_repstat = NFSERR_STALEWRITEVERF; } NFSUNLOCKMNT(nmp); if (nd->nd_flag & ND_NFSV4) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } nfsmout: if (!error && nd->nd_repstat) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * NFS byte range lock rpc. * (Mostly just calls one of the three lower level RPC routines.) */ APPLESTATIC int nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfscllockowner *lp; struct nfsclclient *clp; struct nfsfh *nfhp; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); u_int64_t off, len; off_t start, end; u_int32_t clidrev = 0; int error = 0, newone = 0, expireret = 0, retrycnt, donelocally; int callcnt, dorpc; /* * Convert the flock structure into a start and end and do POSIX * bounds checking. */ switch (fl->l_whence) { case SEEK_SET: case SEEK_CUR: /* * Caller is responsible for adding any necessary offset * when SEEK_CUR is used. */ start = fl->l_start; off = fl->l_start; break; case SEEK_END: start = size + fl->l_start; off = size + fl->l_start; break; default: return (EINVAL); } if (start < 0) return (EINVAL); if (fl->l_len != 0) { end = start + fl->l_len - 1; if (end < start) return (EINVAL); } len = fl->l_len; if (len == 0) len = NFS64BITSSET; retrycnt = 0; do { nd->nd_repstat = 0; if (op == F_GETLK) { error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags); if (!error) { clidrev = clp->nfsc_clientidrev; error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred, p, id, flags); } else if (error == -1) { error = 0; } nfscl_clientrelease(clp); } else if (op == F_UNLCK && fl->l_type == F_UNLCK) { /* * We must loop around for all lockowner cases. */ callcnt = 0; error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); do { error = nfscl_relbytelock(vp, off, len, cred, p, callcnt, clp, id, flags, &lp, &dorpc); /* * If it returns a NULL lp, we're done. */ if (lp == NULL) { if (callcnt == 0) nfscl_clientrelease(clp); else nfscl_releasealllocks(clp, vp, p, id, flags); return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; /* * If the server doesn't support Posix lock semantics, * only allow locks on the entire file, since it won't * handle overlapping byte ranges. * There might still be a problem when a lock * upgrade/downgrade (read<->write) occurs, since the * server "might" expect an unlock first? */ if (dorpc && (lp->nfsl_open->nfso_posixlock || (off == 0 && len == NFS64BITSSET))) { /* * Since the lock records will go away, we must * wait for grace and delay here. */ do { error = nfsrpc_locku(nd, nmp, lp, off, len, NFSV4LOCKT_READ, cred, p, 0); if ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfs_advlock"); } while ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0); } callcnt++; } while (error == 0 && nd->nd_repstat == 0); nfscl_releasealllocks(clp, vp, p, id, flags); } else if (op == F_SETLK) { error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p, NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally); if (error || donelocally) { return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; nfhp = VTONFS(vp)->n_fhp; if (!lp->nfsl_open->nfso_posixlock && (off != 0 || len != NFS64BITSSET)) { error = EINVAL; } else { error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, lp, newone, reclaim, off, len, fl->l_type, cred, p, 0); } if (!error) error = nd->nd_repstat; nfscl_lockrelease(lp, error, newone); } else { error = EINVAL; } if (!error) error = nd->nd_repstat; if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_advlock"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } /* * The lower level routine for the LockT case. */ APPLESTATIC int nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { u_int32_t *tl; int error, type, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsnode *np; struct nfsmount *nmp; struct nfsclsession *tsep; nmp = VFSTONFS(vp->v_mount); NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (fl->l_type == F_RDLCK) *tl++ = txdr_unsigned(NFSV4LOCKT_READ); else *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nfscl_filllockowner(id, own, flags); np = VTONFS(vp); NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN], np->n_fhp->nfh_len); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); if (nd->nd_repstat == 0) { fl->l_type = F_UNLCK; } else if (nd->nd_repstat == NFSERR_DENIED) { nd->nd_repstat = 0; fl->l_whence = SEEK_SET; NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); fl->l_start = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; if (len == NFS64BITSSET) fl->l_len = 0; else fl->l_len = len; type = fxdr_unsigned(int, *tl++); if (type == NFSV4LOCKT_WRITE) fl->l_type = F_WRLCK; else fl->l_type = F_RDLCK; /* * XXX For now, I have no idea what to do with the * conflicting lock_owner, so I'll just set the pid == 0 * and skip over the lock_owner. */ fl->l_pid = (pid_t)0; tl += 2; size = fxdr_unsigned(int, *tl); if (size < 0 || size > NFSV4_OPAQUELIMIT) error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); } else if (nd->nd_repstat == NFSERR_STALECLIENTID) nfscl_initiate_recovery(clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Lower level function that performs the LockU RPC. */ static int nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfscllockowner *lp, u_int64_t off, u_int64_t len, u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error; nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh, lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(type); *tl = txdr_unsigned(lp->nfsl_seqid); if (nfstest_outofseq && (arc4random() % nfstest_outofseq) == 0) *tl = txdr_unsigned(lp->nfsl_seqid + 1); tl++; if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_INCRSEQID(lp->nfsl_seqid, nd); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); lp->nfsl_stateid.seqid = *tl++; lp->nfsl_stateid.other[0] = *tl++; lp->nfsl_stateid.other[1] = *tl++; lp->nfsl_stateid.other[2] = *tl; } else if (nd->nd_repstat == NFSERR_STALESTATEID) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * The actual Lock RPC. */ APPLESTATIC int nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone, int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsclsession *tsep; nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (type == F_RDLCK) *tl++ = txdr_unsigned(NFSV4LOCKT_READ); else *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); *tl++ = txdr_unsigned(reclaim); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; if (newone) { *tl = newnfs_true; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 2 * NFSX_UNSIGNED + NFSX_HYPER); *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = lp->nfsl_open->nfso_stateid.seqid; *tl++ = lp->nfsl_open->nfso_stateid.other[0]; *tl++ = lp->nfsl_open->nfso_stateid.other[1]; *tl++ = lp->nfsl_open->nfso_stateid.other[2]; *tl++ = txdr_unsigned(lp->nfsl_seqid); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); } else { *tl = newnfs_false; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; *tl = txdr_unsigned(lp->nfsl_seqid); if (nfstest_outofseq && (arc4random() % nfstest_outofseq) == 0) *tl = txdr_unsigned(lp->nfsl_seqid + 1); } if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (newone) NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd); NFSCL_INCRSEQID(lp->nfsl_seqid, nd); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); lp->nfsl_stateid.seqid = *tl++; lp->nfsl_stateid.other[0] = *tl++; lp->nfsl_stateid.other[1] = *tl++; lp->nfsl_stateid.other[2] = *tl; } else if (nd->nd_repstat == NFSERR_DENIED) { NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); size = fxdr_unsigned(int, *(tl + 7)); if (size < 0 || size > NFSV4_OPAQUELIMIT) error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); } else if (nd->nd_repstat == NFSERR_STALESTATEID) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs statfs rpc * (always called with the vp for the mount point) */ APPLESTATIC int nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl = NULL; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; nfsattrbit_t attrbits; int error; *attrflagp = 0; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4(nmp)) { /* * For V4, you actually do a getattr. */ NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); NFSSTATFS_GETATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p, cred); if (!error) { nmp->nm_fsid[0] = nap->na_filesid[0]; nmp->nm_fsid[1] = nap->na_filesid[1]; NFSSETHASSETFSID(nmp); *attrflagp = 1; } } else { error = nd->nd_repstat; } if (error) goto nfsmout; } else { NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } if (nd->nd_repstat) { error = nd->nd_repstat; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATFS(nd->nd_flag & ND_NFSV3)); } if (NFSHASNFSV3(nmp)) { sbp->sf_tbytes = fxdr_hyper(tl); tl += 2; sbp->sf_fbytes = fxdr_hyper(tl); tl += 2; sbp->sf_abytes = fxdr_hyper(tl); tl += 2; sbp->sf_tfiles = fxdr_hyper(tl); tl += 2; sbp->sf_ffiles = fxdr_hyper(tl); tl += 2; sbp->sf_afiles = fxdr_hyper(tl); tl += 2; sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl); } else if (NFSHASNFSV4(nmp) == 0) { sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl); } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs pathconf rpc */ APPLESTATIC int nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; u_int32_t *tl; nfsattrbit_t attrbits; int error; *attrflagp = 0; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4(nmp)) { /* * For V4, you actually do a getattr. */ NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); NFSPATHCONF_GETATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (!error) *attrflagp = 1; } else { error = nd->nd_repstat; } } else { NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF); pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++); pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++); pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++); pc->pc_chownrestricted = fxdr_unsigned(u_int32_t, *tl++); pc->pc_caseinsensitive = fxdr_unsigned(u_int32_t, *tl++); pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl); } } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs version 3 fsinfo rpc call */ APPLESTATIC int nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO); fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_maxfilesize = fxdr_hyper(tl); tl += 2; fxdr_nfsv3time(tl, &fsp->fs_timedelta); tl += 2; fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl); } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Renew RPC. */ APPLESTATIC int nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfsmount *nmp; int error; struct nfssockreq *nrp; struct nfsclsession *tsep; nmp = clp->nfsc_nmp; if (nmp == NULL) return (0); if (dsp == NULL) nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0, 0); else nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, &dsp->nfsclds_sess, 0, 0); if (!NFSHASNFSV4N(nmp)) { /* NFSv4.1 just uses a Sequence Op and not a Renew. */ NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; } nrp = NULL; if (dsp != NULL) nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; nd->nd_flag |= ND_USEGSSNAME; if (dsp == NULL) error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); else error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Releaselockowner RPC. */ APPLESTATIC int nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp, uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; u_int32_t *tl; int error; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsclsession *tsep; if (NFSHASNFSV4N(nmp)) { /* For NFSv4.1, do a FreeStateID. */ nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL, NULL, 0, 0); nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID); } else { nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); } nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Compound to get the mount pt FH. */ APPLESTATIC int nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; u_char *cp, *cp2; int error, cnt, len, setnil; u_int32_t *opcntp; nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0, 0); cp = dirpath; cnt = 0; do { setnil = 0; while (*cp == '/') cp++; cp2 = cp; while (*cp2 != '\0' && *cp2 != '/') cp2++; if (*cp2 == '/') { setnil = 1; *cp2 = '\0'; } if (cp2 != cp) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LOOKUP); nfsm_strtom(nd, cp, strlen(cp)); cnt++; } if (setnil) *cp2++ = '/'; cp = cp2; } while (*cp != '\0'); if (NFSHASNFSV4N(nmp)) /* Has a Sequence Op done by nfscl_reqstart(). */ *opcntp = txdr_unsigned(3 + cnt); else *opcntp = txdr_unsigned(2 + cnt); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETFH); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED); tl += (2 + 2 * cnt); if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { nd->nd_repstat = NFSERR_BADXDR; } else { nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len); if (nd->nd_repstat == 0) nmp->nm_fhsize = len; } } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Delegreturn RPC. */ APPLESTATIC int nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p, int syscred) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh, dp->nfsdl_fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getacl call. */ APPLESTATIC int nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp)) return (EOPNOTSUPP); NFSCL_REQSTART(nd, NFSPROC_GETACL, vp); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!nd->nd_repstat) error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred); else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs setacl call. */ APPLESTATIC int nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, void *stuff) { int error; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp)) return (EOPNOTSUPP); error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff); return (error); } /* * nfs setacl call. */ static int nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (EOPNOTSUPP); NFSCL_REQSTART(nd, NFSPROC_SETACL, vp); nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); (void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); /* Don't care about the pre/postop attributes */ mbuf_freem(nd->nd_mrep); return (nd->nd_repstat); } /* * Do the NFSv4.1 Exchange ID. */ int nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl, v41flags; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfsclds *dsp; struct timespec verstime; int error, len; *dspp = NULL; nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* Client owner */ *tl = txdr_unsigned(clp->nfsc_rev); (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(exchflags); *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE); /* Set the implementation id4 */ *tl = txdr_unsigned(1); (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org")); (void) nfsm_strtom(nd, version, strlen(version)); NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME); verstime.tv_sec = 1293840000; /* Jan 1, 2011 */ verstime.tv_nsec = 0; txdr_nfsv4time(&verstime, tl); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error, (int)nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER); len = fxdr_unsigned(int, *(tl + 7)); if (len < 0 || len > NFSV4_OPAQUELIMIT) { error = NFSERR_BADXDR; goto nfsmout; } dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS, M_WAITOK | M_ZERO); dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew; dsp->nfsclds_servownlen = len; dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++; dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++; dsp->nfsclds_sess.nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++); v41flags = fxdr_unsigned(uint32_t, *tl); if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 && NFSHASPNFSOPT(nmp)) { NFSCL_DEBUG(1, "set PNFS\n"); NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_PNFS; NFSUNLOCKMNT(nmp); dsp->nfsclds_flags |= NFSCLDS_MDS; } if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0) dsp->nfsclds_flags |= NFSCLDS_DS; if (len > 0) nd->nd_repstat = nfsrv_mtostr(nd, dsp->nfsclds_serverown, len); if (nd->nd_repstat == 0) { mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); nfscl_initsessionslots(&dsp->nfsclds_sess); *dspp = dsp; } else free(dsp, M_NFSCLDS); } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Create Session. */ int nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, NFSPROC_T *p) { uint32_t crflags, maxval, *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error, irdcnt; /* Make sure nm_rsize, nm_wsize is set. */ if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0) nmp->nm_rsize = NFS_MAXBSIZE; if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0) nmp->nm_wsize = NFS_MAXBSIZE; nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); *tl++ = sep->nfsess_clientid.lval[0]; *tl++ = sep->nfsess_clientid.lval[1]; *tl++ = txdr_unsigned(sequenceid); crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST); if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0) crflags |= NFSV4CRSESS_CONNBACKCHAN; *tl = txdr_unsigned(crflags); /* Fill in fore channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */ *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */ *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(20); /* Max operations */ *tl++ = txdr_unsigned(64); /* Max slots */ *tl = 0; /* No rdma ird */ /* Fill in back channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ *tl++ = txdr_unsigned(10000); /* Max request size */ *tl++ = txdr_unsigned(10000); /* Max response size */ *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(4); /* Max operations */ *tl++ = txdr_unsigned(NFSV4_CBSLOTS); /* Max slots */ *tl = 0; /* No rdma ird */ NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */ /* Allow AUTH_SYS callbacks as uid, gid == 0. */ *tl++ = txdr_unsigned(1); /* Auth_sys only */ *tl++ = txdr_unsigned(AUTH_SYS); /* AUTH_SYS type */ *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */ *tl++ = 0; /* Null machine name */ *tl++ = 0; /* Uid == 0 */ *tl++ = 0; /* Gid == 0 */ *tl = 0; /* No additional gids */ nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED); bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++); crflags = fxdr_unsigned(uint32_t, *tl); if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_SESSPERSIST; NFSUNLOCKMNT(nmp); } /* Get the fore channel slot count. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Skip the header pad size. */ /* Make sure nm_wsize is small enough. */ maxval = fxdr_unsigned(uint32_t, *tl++); while (maxval < nmp->nm_wsize + NFS_MAXXDR) { if (nmp->nm_wsize > 8096) nmp->nm_wsize /= 2; else break; } /* Make sure nm_rsize is small enough. */ maxval = fxdr_unsigned(uint32_t, *tl++); while (maxval < nmp->nm_rsize + NFS_MAXXDR) { if (nmp->nm_rsize > 8096) nmp->nm_rsize /= 2; else break; } sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); tl++; sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots); irdcnt = fxdr_unsigned(int, *tl); if (irdcnt > 0) NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED); /* and the back channel slot count. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl += 5; sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl); NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots); } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Destroy Session. */ int nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; struct nfsclsession *tsep; nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); tsep = nfsmnt_mdssession(nmp); bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Destroy Client. */ int nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; struct nfsclsession *tsep; nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 LayoutGet. */ static int nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode, uint64_t offset, uint64_t len, uint64_t minlen, int layouttype, int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0, 0); nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp, layouttype, layoutlen, 0); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat == 0) error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp); if (error == 0 && nd->nd_repstat != 0) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Get Device Info. */ int nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred, NFSPROC_T *p) { uint32_t cnt, *tl, vers, minorvers; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct sockaddr_in sin, ssin; struct sockaddr_in6 sin6, ssin6; struct nfsclds *dsp = NULL, **dspp, **gotdspp; struct nfscldevinfo *ndi; int addrcnt = 0, bitcnt, error, gotvers, i, isudp, j, stripecnt; uint8_t stripeindex; sa_family_t af, safilled; *ndip = NULL; ndi = NULL; gotdspp = NULL; nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED); NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); *tl++ = txdr_unsigned(layouttype); *tl++ = txdr_unsigned(100000); if (notifybitsp != NULL && *notifybitsp != 0) { *tl = txdr_unsigned(1); /* One word of bits. */ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(*notifybitsp); } else *tl = txdr_unsigned(0); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (layouttype != fxdr_unsigned(int, *tl)) printf("EEK! devinfo layout type not same!\n"); if (layouttype == NFSLAYOUT_NFSV4_1_FILES) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); stripecnt = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt); if (stripecnt < 1 || stripecnt > 4096) { printf("pNFS File layout devinfo stripecnt %d:" " out of range\n", stripecnt); error = NFSERR_BADXDR; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED); addrcnt = fxdr_unsigned(int, *(tl + stripecnt)); NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt); if (addrcnt < 1 || addrcnt > 128) { printf("NFS devinfo addrcnt %d: out of range\n", addrcnt); error = NFSERR_BADXDR; goto nfsmout; } /* * Now we know how many stripe indices and addresses, so * we can allocate the structure the correct size. */ i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *) + 1; NFSCL_DEBUG(4, "stripeindices=%d\n", i); ndi = malloc(sizeof(*ndi) + (addrcnt + i) * sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO); NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID); ndi->nfsdi_refcnt = 0; ndi->nfsdi_flags = NFSDI_FILELAYOUT; ndi->nfsdi_stripecnt = stripecnt; ndi->nfsdi_addrcnt = addrcnt; /* Fill in the stripe indices. */ for (i = 0; i < stripecnt; i++) { stripeindex = fxdr_unsigned(uint8_t, *tl++); NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex); if (stripeindex >= addrcnt) { printf("pNFS File Layout devinfo" " stripeindex %d: too big\n", (int)stripeindex); error = NFSERR_BADXDR; goto nfsmout; } nfsfldi_setstripeindex(ndi, i, stripeindex); } } else if (layouttype == NFSLAYOUT_FLEXFILE) { /* For Flex File, we only get one address list. */ ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO); NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID); ndi->nfsdi_refcnt = 0; ndi->nfsdi_flags = NFSDI_FLEXFILE; addrcnt = ndi->nfsdi_addrcnt = 1; } /* Now, dissect the server address(es). */ safilled = AF_UNSPEC; for (i = 0; i < addrcnt; i++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(uint32_t, *tl); if (cnt == 0) { printf("NFS devinfo 0 len addrlist\n"); error = NFSERR_BADXDR; goto nfsmout; } dspp = nfsfldi_addr(ndi, i); safilled = AF_UNSPEC; for (j = 0; j < cnt; j++) { error = nfsv4_getipaddr(nd, &sin, &sin6, &af, &isudp); if (error != 0 && error != EPERM) { error = NFSERR_BADXDR; goto nfsmout; } if (error == 0 && isudp == 0) { /* * The priority is: * - Same address family. * Save the address and dspp, so that * the connection can be done after * parsing is complete. */ if (safilled == AF_UNSPEC || (af == nmp->nm_nam->sa_family && safilled != nmp->nm_nam->sa_family) ) { if (af == AF_INET) ssin = sin; else ssin6 = sin6; safilled = af; gotdspp = dspp; } } } } gotvers = NFS_VER4; /* Always NFSv4 for File Layout. */ /* For Flex File, we will take one of the versions to use. */ if (layouttype == NFSLAYOUT_FLEXFILE) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 1 || j > NFSDEV_MAXVERS) { printf("pNFS: too many versions\n"); error = NFSERR_BADXDR; goto nfsmout; } gotvers = 0; for (i = 0; i < j; i++) { NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED); vers = fxdr_unsigned(uint32_t, *tl++); minorvers = fxdr_unsigned(uint32_t, *tl++); if ((vers == NFS_VER4 && minorvers == NFSV41_MINORVERSION) || (vers == NFS_VER3 && gotvers == 0)) { gotvers = vers; /* We'll take this one. */ ndi->nfsdi_versindex = i; ndi->nfsdi_vers = vers; ndi->nfsdi_minorvers = minorvers; ndi->nfsdi_rsize = fxdr_unsigned( uint32_t, *tl++); ndi->nfsdi_wsize = fxdr_unsigned( uint32_t, *tl++); if (*tl == newnfs_true) ndi->nfsdi_flags |= NFSDI_TIGHTCOUPLED; else ndi->nfsdi_flags &= ~NFSDI_TIGHTCOUPLED; } } if (gotvers == 0) { printf("pNFS: no NFSv3 or NFSv4.1\n"); error = NFSERR_BADXDR; goto nfsmout; } } /* And the notify bits. */ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); bitcnt = fxdr_unsigned(int, *tl); if (bitcnt > 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (notifybitsp != NULL) *notifybitsp = fxdr_unsigned(uint32_t, *tl); } if (safilled != AF_UNSPEC) { KASSERT(ndi != NULL, ("ndi is NULL")); *ndip = ndi; } else error = EPERM; if (error == 0) { /* * Now we can do a TCP connection for the correct * NFS version and IP address. */ error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled, gotvers, &dsp, p); } if (error == 0) { KASSERT(gotdspp != NULL, ("gotdspp is NULL")); *gotdspp = dsp; } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; nfsmout: if (error != 0 && ndi != NULL) nfscl_freedevinfo(ndi); mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 LayoutCommit. */ int nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp, int layouttype, struct ucred *cred, NFSPROC_T *p, void *stuff) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER + NFSX_STATEID); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; if (reclaim != 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl++ = txdr_unsigned(stateidp->seqid); *tl++ = stateidp->other[0]; *tl++ = stateidp->other[1]; *tl++ = stateidp->other[2]; *tl++ = newnfs_true; if (lastbyte < off) lastbyte = off; else if (lastbyte >= (off + len)) lastbyte = off + len - 1; txdr_hyper(lastbyte, tl); tl += 2; *tl++ = newnfs_false; *tl++ = txdr_unsigned(layouttype); /* All supported layouts appear to be 0 length for now. */ if (layouttype == NFSLAYOUT_NFSV4_1_FILES) *tl = txdr_unsigned(0); else if (layouttype == NFSLAYOUT_FLEXFILE) *tl = txdr_unsigned(0); else *tl = txdr_unsigned(0); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 LayoutReturn. */ int nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset, uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p, void *stuff) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); if (reclaim != 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl++ = txdr_unsigned(layouttype); *tl++ = txdr_unsigned(iomode); *tl = txdr_unsigned(layoutreturn); if (layoutreturn == NFSLAYOUTRETURN_FILE) { NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; txdr_hyper(len, tl); tl += 2; NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid); *tl++ = txdr_unsigned(stateidp->seqid); *tl++ = stateidp->other[0]; *tl++ = stateidp->other[1]; *tl++ = stateidp->other[2]; if (layouttype == NFSLAYOUT_NFSV4_1_FILES) *tl = txdr_unsigned(0); else if (layouttype == NFSLAYOUT_FLEXFILE) { *tl = txdr_unsigned(2 * NFSX_UNSIGNED); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); /* No ioerrs or stats yet. */ *tl++ = 0; *tl = 0; } } nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (*tl != 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); stateidp->other[0] = *tl++; stateidp->other[1] = *tl++; stateidp->other[2] = *tl; } } else error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Acquire a layout and devinfo, if possible. The caller must have acquired * a reference count on the nfsclclient structure before calling this. * Return the layout in lypp with a reference count on it, if successful. */ static int nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp, int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p) { struct nfscllayout *lyp; struct nfsclflayout *flp; struct nfsclflayouthead flh; int error = 0, islocked, layoutlen, layouttype, recalled, retonclose; nfsv4stateid_t stateid; struct nfsclsession *tsep; *lypp = NULL; if (NFSHASFLEXFILE(nmp)) layouttype = NFSLAYOUT_FLEXFILE; else layouttype = NFSLAYOUT_NFSV4_1_FILES; /* * If lyp is returned non-NULL, there will be a refcnt (shared lock) * on it, iff flp != NULL or a lock (exclusive lock) on it iff * flp == NULL. */ lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len, off, &flp, &recalled); islocked = 0; if (lyp == NULL || flp == NULL) { if (recalled != 0) return (EIO); LIST_INIT(&flh); tsep = nfsmnt_mdssession(nmp); layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED); if (lyp == NULL) { stateid.seqid = 0; stateid.other[0] = stateidp->other[0]; stateid.other[1] = stateidp->other[1]; stateid.other[2] = stateidp->other[2]; error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX, (uint64_t)0, layouttype, layoutlen, &stateid, &retonclose, &flh, cred, p, NULL); } else { islocked = 1; stateid.seqid = lyp->nfsly_stateid.seqid; stateid.other[0] = lyp->nfsly_stateid.other[0]; stateid.other[1] = lyp->nfsly_stateid.other[1]; stateid.other[2] = lyp->nfsly_stateid.other[2]; error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, nfhp->nfh_len, iomode, off, UINT64_MAX, (uint64_t)0, layouttype, layoutlen, &stateid, &retonclose, &flh, cred, p, NULL); } error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp, &flh, layouttype, error, NULL, cred, p); if (error == 0) *lypp = lyp; else if (islocked != 0) nfscl_rellayout(lyp, 1); } else *lypp = lyp; return (error); } /* * Do a TCP connection plus exchange id and create session. * If successful, a "struct nfsclds" is linked into the list for the * mount point and a pointer to it is returned. */ static int nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin, struct sockaddr_in6 *sin6, sa_family_t af, int vers, struct nfsclds **dspp, NFSPROC_T *p) { struct sockaddr_in *msad, *sad; struct sockaddr_in6 *msad6, *sad6; struct nfsclclient *clp; struct nfssockreq *nrp; struct nfsclds *dsp, *tdsp; int error; enum nfsclds_state retv; uint32_t sequenceid; KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("nfsrpc_fillsa: NULL nr_cred")); NFSLOCKCLSTATE(); clp = nmp->nm_clp; NFSUNLOCKCLSTATE(); if (clp == NULL) return (EPERM); if (af == AF_INET) { NFSLOCKMNT(nmp); /* * Check to see if we already have a session for this * address that is usable for a DS. * Note that the MDS's address is in a different place * than the sessions already acquired for DS's. */ msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam; tdsp = TAILQ_FIRST(&nmp->nm_sess); while (tdsp != NULL) { if (msad != NULL && msad->sin_family == AF_INET && sin->sin_addr.s_addr == msad->sin_addr.s_addr && sin->sin_port == msad->sin_port && (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 && tdsp->nfsclds_sess.nfsess_defunct == 0) { *dspp = tdsp; NFSUNLOCKMNT(nmp); NFSCL_DEBUG(4, "fnd same addr\n"); return (0); } tdsp = TAILQ_NEXT(tdsp, nfsclds_list); if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) msad = (struct sockaddr_in *) tdsp->nfsclds_sockp->nr_nam; else msad = NULL; } NFSUNLOCKMNT(nmp); /* No IP address match, so look for new/trunked one. */ sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO); sad->sin_len = sizeof(*sad); sad->sin_family = AF_INET; sad->sin_port = sin->sin_port; sad->sin_addr.s_addr = sin->sin_addr.s_addr; nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); nrp->nr_nam = (struct sockaddr *)sad; } else if (af == AF_INET6) { NFSLOCKMNT(nmp); /* * Check to see if we already have a session for this * address that is usable for a DS. * Note that the MDS's address is in a different place * than the sessions already acquired for DS's. */ msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam; tdsp = TAILQ_FIRST(&nmp->nm_sess); while (tdsp != NULL) { if (msad6 != NULL && msad6->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &msad6->sin6_addr) && sin6->sin6_port == msad6->sin6_port && (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 && tdsp->nfsclds_sess.nfsess_defunct == 0) { *dspp = tdsp; NFSUNLOCKMNT(nmp); return (0); } tdsp = TAILQ_NEXT(tdsp, nfsclds_list); if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) msad6 = (struct sockaddr_in6 *) tdsp->nfsclds_sockp->nr_nam; else msad6 = NULL; } NFSUNLOCKMNT(nmp); /* No IP address match, so look for new/trunked one. */ sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO); sad6->sin6_len = sizeof(*sad6); sad6->sin6_family = AF_INET6; sad6->sin6_port = sin6->sin6_port; NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr, sizeof(struct in6_addr)); nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); nrp->nr_nam = (struct sockaddr *)sad6; } else return (EPERM); nrp->nr_sotype = SOCK_STREAM; mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF); nrp->nr_prog = NFS_PROG; nrp->nr_vers = vers; /* * Use the credentials that were used for the mount, which are * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc. * Ref. counting the credentials with crhold() is probably not * necessary, since nm_sockreq.nr_cred won't be crfree()'d until * unmount, but I did it anyhow. */ nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred); error = newnfs_connect(nmp, nrp, NULL, p, 0); NFSCL_DEBUG(3, "DS connect=%d\n", error); dsp = NULL; /* Now, do the exchangeid and create session. */ if (error == 0) { if (vers == NFS_VER4) { error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS, &dsp, nrp->nr_cred, p); NFSCL_DEBUG(3, "DS exchangeid=%d\n", error); if (error != 0) newnfs_disconnect(nrp); } else { dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO); dsp->nfsclds_flags |= NFSCLDS_DS; dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */ mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); } } if (error == 0) { dsp->nfsclds_sockp = nrp; if (vers == NFS_VER4) { NFSLOCKMNT(nmp); retv = nfscl_getsameserver(nmp, dsp, &tdsp); NFSCL_DEBUG(3, "getsame ret=%d\n", retv); if (retv == NFSDSP_USETHISSESSION) { NFSUNLOCKMNT(nmp); /* * If there is already a session for this * server, use it. */ (void)newnfs_disconnect(nrp); nfscl_freenfsclds(dsp); *dspp = tdsp; return (0); } if (retv == NFSDSP_SEQTHISSESSION) sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid; else sequenceid = dsp->nfsclds_sess.nfsess_sequenceid; NFSUNLOCKMNT(nmp); error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, nrp, sequenceid, 0, nrp->nr_cred, p); NFSCL_DEBUG(3, "DS createsess=%d\n", error); } } else { NFSFREECRED(nrp->nr_cred); NFSFREEMUTEX(&nrp->nr_mtx); free(nrp->nr_nam, M_SONAME); free(nrp, M_NFSSOCKREQ); } if (error == 0) { NFSCL_DEBUG(3, "add DS session\n"); /* * Put it at the end of the list. That way the list * is ordered by when the entry was added. This matters * since the one done first is the one that should be * used for sequencid'ing any subsequent create sessions. */ NFSLOCKMNT(nmp); TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list); NFSUNLOCKMNT(nmp); *dspp = dsp; } else if (dsp != NULL) { newnfs_disconnect(nrp); nfscl_freenfsclds(dsp); } return (error); } /* * Do the NFSv4.1 Reclaim Complete. */ int nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = newnfs_false; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Initialize the slot tables for a session. */ static void nfscl_initsessionslots(struct nfsclsession *sep) { int i; for (i = 0; i < NFSV4_CBSLOTS; i++) { if (sep->nfsess_cbslots[i].nfssl_reply != NULL) m_freem(sep->nfsess_cbslots[i].nfssl_reply); NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot)); } for (i = 0; i < 64; i++) sep->nfsess_slotseq[i] = 0; sep->nfsess_slots = 0; } /* * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS). */ int nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p) { struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfscllayout *layp; struct nfscldevinfo *dip; struct nfsclflayout *rflp; struct mbuf *m; + struct nfsclwritedsdorpc *drpc, *tdrpc; nfsv4stateid_t stateid; struct ucred *newcred; uint64_t lastbyte, len, off, oresid, xfer; - int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled; + int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo; void *lckp; uint8_t *dev; void *iovbase; size_t iovlen; off_t offs; ssize_t resid; if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || (np->n_flag & NNOLAYOUT) != 0) return (EIO); /* Now, get a reference cnt on the clientid for this mount. */ if (nfscl_getref(nmp) == 0) return (EIO); /* Find an appropriate stateid. */ newcred = NFSNEWCRED(cred); error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, rwaccess, 1, newcred, p, &stateid, &lckp); if (error != 0) { NFSFREECRED(newcred); nfscl_relref(nmp); return (error); } /* Search for a layout for this file. */ off = uiop->uio_offset; layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, off, &rflp, &recalled); if (layp == NULL || rflp == NULL) { if (recalled != 0) { NFSFREECRED(newcred); nfscl_relref(nmp); return (EIO); } if (layp != NULL) { nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0); layp = NULL; } /* Try and get a Layout, if it is supported. */ if (rwaccess == NFSV4OPEN_ACCESSWRITE || (np->n_flag & NWRITEOPENED) != 0) iolaymode = NFSLAYOUTIOMODE_RW; else iolaymode = NFSLAYOUTIOMODE_READ; error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode, NULL, &stateid, off, &layp, newcred, p); if (error != 0) { NFSLOCKNODE(np); np->n_flag |= NNOLAYOUT; NFSUNLOCKNODE(np); if (lckp != NULL) nfscl_lockderef(lckp); NFSFREECRED(newcred); if (layp != NULL) nfscl_rellayout(layp, 0); nfscl_relref(nmp); return (error); } } /* * Loop around finding a layout that works for the first part of * this I/O operation, and then call the function that actually * does the RPC. */ eof = 0; len = (uint64_t)uiop->uio_resid; while (len > 0 && error == 0 && eof == 0) { off = uiop->uio_offset; error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp); if (error == 0) { oresid = xfer = (uint64_t)uiop->uio_resid; if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off)) xfer = rflp->nfsfl_end - rflp->nfsfl_off; /* * For Flex File layout with mirrored DSs, select one * of them at random for reads. For writes and commits, * do all mirrors. */ m = NULL; + drpc = NULL; firstmirror = 0; mirrorcnt = 1; if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 && (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) { if (rwaccess == NFSV4OPEN_ACCESSREAD) { firstmirror = arc4random() % mirrorcnt; mirrorcnt = firstmirror + 1; - } else if (docommit == 0) { - /* - * Save values, so uiop can be rolled - * back upon a write error. - */ - offs = uiop->uio_offset; - resid = uiop->uio_resid; - iovbase = uiop->uio_iov->iov_base; - iovlen = uiop->uio_iov->iov_len; - m = nfsm_uiombuflist(uiop, len, NULL, - NULL); + } else { + if (docommit == 0) { + /* + * Save values, so uiop can be + * rolled back upon a write + * error. + */ + offs = uiop->uio_offset; + resid = uiop->uio_resid; + iovbase = + uiop->uio_iov->iov_base; + iovlen = uiop->uio_iov->iov_len; + m = nfsm_uiombuflist(uiop, len, + NULL, NULL); + } + tdrpc = drpc = malloc(sizeof(*drpc) * + (mirrorcnt - 1), M_TEMP, M_WAITOK | + M_ZERO); } } for (i = firstmirror; i < mirrorcnt && error == 0; i++){ if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) dev = rflp->nfsfl_ffm[i].dev; else dev = rflp->nfsfl_dev; dip = nfscl_getdevinfo(nmp->nm_clp, dev, rflp->nfsfl_devp); if (dip != NULL) { if ((rflp->nfsfl_flags & NFSFL_FLEXFILE) != 0) error = nfscl_dofflayoutio(vp, uiop, iomode, must_commit, &eof, &stateid, rwaccess, dip, layp, rflp, off, xfer, - i, docommit, m, newcred, - p); + i, docommit, m, tdrpc, + newcred, p); else error = nfscl_doflayoutio(vp, uiop, iomode, must_commit, &eof, &stateid, rwaccess, dip, layp, rflp, off, xfer, docommit, newcred, p); nfscl_reldevinfo(dip); } else error = EIO; + tdrpc++; } if (m != NULL) m_freem(m); + tdrpc = drpc; + timo = hz / 50; /* Wait for 20msec. */ + if (timo < 1) + timo = 1; + for (i = firstmirror; i < mirrorcnt - 1 && + tdrpc != NULL; i++, tdrpc++) { + /* + * For the unused drpc entries, both inprog and + * err == 0, so this loop won't break. + */ + while (tdrpc->inprog != 0 && tdrpc->done == 0) + tsleep(&tdrpc->tsk, PVFS, "clrpcio", + timo); + if (error == 0 && tdrpc->err != 0) + error = tdrpc->err; + } + free(drpc, M_TEMP); if (error == 0) { if (mirrorcnt > 1 && rwaccess == NFSV4OPEN_ACCESSWRITE && docommit == 0) { NFSLOCKCLSTATE(); layp->nfsly_flags |= NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); } lastbyte = off + xfer - 1; NFSLOCKCLSTATE(); if (lastbyte > layp->nfsly_lastbyte) layp->nfsly_lastbyte = lastbyte; NFSUNLOCKCLSTATE(); } else if (error == NFSERR_OPENMODE && rwaccess == NFSV4OPEN_ACCESSREAD) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_OPENMODE; NFSUNLOCKMNT(nmp); } else error = EIO; if (error == 0) len -= (oresid - (uint64_t)uiop->uio_resid); else if (mirrorcnt > 1 && rwaccess == NFSV4OPEN_ACCESSWRITE && docommit == 0) { /* * In case the rpc gets retried, roll the * uio fields changed by nfsm_uiombuflist() * back. */ uiop->uio_offset = offs; uiop->uio_resid = resid; uiop->uio_iov->iov_base = iovbase; uiop->uio_iov->iov_len = iovlen; } } } if (lckp != NULL) nfscl_lockderef(lckp); NFSFREECRED(newcred); nfscl_rellayout(layp, 0); nfscl_relref(nmp); return (error); } /* * Make a copy of the mbuf chain and add an mbuf for null padding, as required. */ static struct mbuf * nfsm_copym(struct mbuf *m, int off, int xfer) { struct mbuf *m2, *m3, *m4; uint32_t *tl; int rem; m2 = m_copym(m, off, xfer, M_WAITOK); rem = NFSM_RNDUP(xfer) - xfer; if (rem > 0) { /* * The zero padding to a multiple of 4 bytes is required by * the XDR. So that the mbufs copied by reference aren't * modified, add an mbuf with the zero'd bytes to the list. * rem will be a maximum of 3, so one zero'd uint32_t is * sufficient. */ m3 = m2; while (m3->m_next != NULL) m3 = m3->m_next; NFSMGET(m4); tl = NFSMTOD(m4, uint32_t *); *tl = 0; mbuf_setlen(m4, rem); mbuf_setnext(m3, m4); } return (m2); } /* * Find a file layout that will handle the first bytes of the requested * range and return the information from it needed to to the I/O operation. */ int nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess, struct nfsclflayout **retflpp) { struct nfsclflayout *flp, *nflp, *rflp; uint32_t rw; rflp = NULL; rw = rwaccess; /* For reading, do the Read list first and then the Write list. */ do { if (rw == NFSV4OPEN_ACCESSREAD) flp = LIST_FIRST(&lyp->nfsly_flayread); else flp = LIST_FIRST(&lyp->nfsly_flayrw); while (flp != NULL) { nflp = LIST_NEXT(flp, nfsfl_list); if (flp->nfsfl_off > off) break; if (flp->nfsfl_end > off && (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end)) rflp = flp; flp = nflp; } if (rw == NFSV4OPEN_ACCESSREAD) rw = NFSV4OPEN_ACCESSWRITE; else rw = 0; } while (rw != 0); if (rflp != NULL) { /* This one covers the most bytes starting at off. */ *retflpp = rflp; return (0); } return (EIO); } /* * Do I/O using an NFSv4.1 file layout. */ static int nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp, struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off, uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p) { uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer; int commit_thru_mds, error, stripe_index, stripe_pos; struct nfsnode *np; struct nfsfh *fhp; struct nfsclds **dspp; np = VTONFS(vp); rel_off = off - flp->nfsfl_patoff; stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff; stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) % dp->nfsdi_stripecnt; transfer = stripe_unit_size - (rel_off % stripe_unit_size); error = 0; /* Loop around, doing I/O for each stripe unit. */ while (len > 0 && error == 0) { stripe_index = nfsfldi_stripeindex(dp, stripe_pos); dspp = nfsfldi_addr(dp, stripe_index); if (len > transfer && docommit == 0) xfer = transfer; else xfer = len; if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) { /* Dense layout. */ if (stripe_pos >= flp->nfsfl_fhcnt) return (EIO); fhp = flp->nfsfl_fh[stripe_pos]; io_off = (rel_off / (stripe_unit_size * dp->nfsdi_stripecnt)) * stripe_unit_size + rel_off % stripe_unit_size; } else { /* Sparse layout. */ if (flp->nfsfl_fhcnt > 1) { if (stripe_index >= flp->nfsfl_fhcnt) return (EIO); fhp = flp->nfsfl_fh[stripe_index]; } else if (flp->nfsfl_fhcnt == 1) fhp = flp->nfsfl_fh[0]; else fhp = np->n_fhp; io_off = off; } if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) { commit_thru_mds = 1; if (docommit != 0) error = EIO; } else { commit_thru_mds = 0; mtx_lock(&np->n_mtx); np->n_flag |= NDSCOMMIT; mtx_unlock(&np->n_mtx); } if (docommit != 0) { if (error == 0) error = nfsrpc_commitds(vp, io_off, xfer, *dspp, fhp, 0, 0, cred, p); if (error == 0) { /* * Set both eof and uio_resid = 0 to end any * loops. */ *eofp = 1; uiop->uio_resid = 0; } else { mtx_lock(&np->n_mtx); np->n_flag &= ~NDSCOMMIT; mtx_unlock(&np->n_mtx); } } else if (rwflag == NFSV4OPEN_ACCESSREAD) error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp, io_off, xfer, fhp, 0, 0, 0, cred, p); else { error = nfsrpc_writeds(vp, uiop, iomode, must_commit, stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds, 0, 0, 0, cred, p); if (error == 0) { NFSLOCKCLSTATE(); lyp->nfsly_flags |= NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); } } if (error == 0) { transfer = stripe_unit_size; stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt; len -= xfer; off += xfer; } } return (error); } /* * Do I/O using an NFSv4.1 flex file layout. */ static int nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp, struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off, - uint64_t len, int mirror, int docommit, struct mbuf *mp, struct ucred *cred, - NFSPROC_T *p) + uint64_t len, int mirror, int docommit, struct mbuf *mp, + struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p) { uint64_t transfer, xfer; int error, rel_off; struct nfsnode *np; struct nfsfh *fhp; struct nfsclds **dspp; struct ucred *tcred; struct mbuf *m; np = VTONFS(vp); error = 0; rel_off = 0; NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off, (uintmax_t)len); /* Loop around, doing I/O for each stripe unit. */ while (len > 0 && error == 0) { dspp = nfsfldi_addr(dp, 0); fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex]; stateidp = &flp->nfsfl_ffm[mirror].st; NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n", mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid); if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) { tcred = NFSNEWCRED(cred); tcred->cr_uid = flp->nfsfl_ffm[mirror].user; tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group; tcred->cr_ngroups = 1; } else tcred = cred; if (rwflag == NFSV4OPEN_ACCESSREAD) transfer = dp->nfsdi_rsize; else transfer = dp->nfsdi_wsize; mtx_lock(&np->n_mtx); np->n_flag |= NDSCOMMIT; mtx_unlock(&np->n_mtx); if (len > transfer && docommit == 0) xfer = transfer; else xfer = len; if (docommit != 0) { - if (error == 0) - error = nfsrpc_commitds(vp, off, xfer, *dspp, - fhp, dp->nfsdi_vers, dp->nfsdi_minorvers, - tcred, p); - NFSCL_DEBUG(4, "aft nfsrpc_commitds=%d\n", error); if (error == 0) { /* + * Do last mirrored DS commit with this thread. + */ + if (mirror < flp->nfsfl_mirrorcnt - 1) + error = nfsio_commitds(vp, off, xfer, + *dspp, fhp, dp->nfsdi_vers, + dp->nfsdi_minorvers, drpc, tcred, + p); + else + error = nfsrpc_commitds(vp, off, xfer, + *dspp, fhp, dp->nfsdi_vers, + dp->nfsdi_minorvers, tcred, p); + } + NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error); + if (error == 0) { + /* * Set both eof and uio_resid = 0 to end any * loops. */ *eofp = 1; uiop->uio_resid = 0; } else { mtx_lock(&np->n_mtx); np->n_flag &= ~NDSCOMMIT; mtx_unlock(&np->n_mtx); } } else if (rwflag == NFSV4OPEN_ACCESSREAD) error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp, off, xfer, fhp, 1, dp->nfsdi_vers, dp->nfsdi_minorvers, tcred, p); else { if (flp->nfsfl_mirrorcnt == 1) { error = nfsrpc_writeds(vp, uiop, iomode, must_commit, stateidp, *dspp, off, xfer, fhp, 0, 1, dp->nfsdi_vers, dp->nfsdi_minorvers, tcred, p); if (error == 0) { NFSLOCKCLSTATE(); lyp->nfsly_flags |= NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); } } else { m = nfsm_copym(mp, rel_off, xfer); NFSCL_DEBUG(4, "mcopy reloff=%d xfer=%jd\n", rel_off, (uintmax_t)xfer); - error = nfsrpc_writedsmir(vp, iomode, - must_commit, stateidp, *dspp, off, xfer, - fhp, m, dp->nfsdi_vers, dp->nfsdi_minorvers, - tcred, p); - NFSCL_DEBUG(4, "nfsrpc_writedsmir=%d\n", error); + /* + * Do last write to a mirrored DS with this + * thread. + */ + if (mirror < flp->nfsfl_mirrorcnt - 1) + error = nfsio_writedsmir(vp, iomode, + must_commit, stateidp, *dspp, off, + xfer, fhp, m, dp->nfsdi_vers, + dp->nfsdi_minorvers, drpc, tcred, + p); + else + error = nfsrpc_writedsmir(vp, iomode, + must_commit, stateidp, *dspp, off, + xfer, fhp, m, dp->nfsdi_vers, + dp->nfsdi_minorvers, tcred, p); + NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error); } } NFSCL_DEBUG(4, "aft read/writeds=%d\n", error); if (error == 0) { len -= xfer; off += xfer; rel_off += xfer; } if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) NFSFREECRED(tcred); } NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error); return (error); } /* * The actual read RPC done to a DS. */ static int nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp, struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex, int vers, int minorvers, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; int attrflag, error, retlen; struct nfsrv_descript nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsrv_descript *nd = &nfsd; struct nfssockreq *nrp; struct nfsvattr na; nd->nd_mrep = NULL; if (vers == 0 || vers == NFS_VER4) { nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); vers = NFS_VER4; NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers); if (flex != 0) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); else nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); } else { nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n"); } NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); txdr_hyper(io_off, tl); *(tl + 2) = txdr_unsigned(len); nrp = dsp->nfsclds_sockp; NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp); if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat, error); if (error != 0) return (error); if (vers == NFS_VER3) { error = nfscl_postop_attr(nd, &na, &attrflag, NULL); NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error); if (error != 0) goto nfsmout; } if (nd->nd_repstat != 0) { error = nd->nd_repstat; goto nfsmout; } if (vers == NFS_VER3) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); *eofp = fxdr_unsigned(int, *(tl + 1)); } else { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); *eofp = fxdr_unsigned(int, *tl); } NFSM_STRSIZ(retlen, len); NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp); error = nfsm_mbufuio(nd, uiop, retlen); nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } /* * The actual write RPC done to a DS. */ static int nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC; int32_t backup; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfssockreq *nrp; struct nfsvattr na; KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); nd->nd_mrep = NULL; if (vers == 0 || vers == NFS_VER4) { nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers); vers = NFS_VER4; if (flex != 0) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); else nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); } else { nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n"); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED); } txdr_hyper(io_off, tl); tl += 2; if (vers == NFS_VER3) *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); nfsm_uiombuf(nd, uiop, len); nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error, nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat != 0) { /* * In case the rpc gets retried, roll * the uio fileds changed by nfsm_uiombuf() * back. */ uiop->uio_offset -= len; uio_uio_resid_add(uiop, len); uio_iov_base_add(uiop, -len); uio_iov_len_add(uiop, len); error = nd->nd_repstat; } else { if (vers == NFS_VER3) { error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL, NULL); NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error); if (error != 0) goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen); if (rlen == 0) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { backup = len - rlen; uio_iov_base_add(uiop, -(backup)); uio_iov_len_add(uiop, backup); uiop->uio_offset -= backup; uio_uio_resid_add(uiop, backup); len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest commitment level * obtained by any of the RPCs. */ if (committed == NFSWRITE_FILESYNC) committed = commit; else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; if (commit_thru_mds != 0) { NFSLOCKMNT(nmp); if (!NFSHASWRITEVERF(nmp)) { NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); NFSSETWRITEVERF(nmp); } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); } NFSUNLOCKMNT(nmp); } else { NFSLOCKDS(dsp); if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) { NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF; } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); } NFSUNLOCKDS(dsp); } } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); *iomode = committed; if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; return (error); } /* * The actual write RPC done to a DS. * This variant is called from a separate kernel process for mirrors. * Any short write is considered an IO error. */ static int nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit, nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfssockreq *nrp; struct nfsvattr na; nd->nd_mrep = NULL; if (vers == 0 || vers == NFS_VER4) { nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); vers = NFS_VER4; NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n", minorvers); nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); } else { nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n"); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED); } txdr_hyper(io_off, tl); tl += 2; if (vers == NFS_VER3) *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); if (len > 0) { /* Put data in mbuf chain. */ nd->nd_mb->m_next = m; /* Set nd_mb and nd_bpos to end of data. */ while (m->m_next != NULL) m = m->m_next; nd->nd_mb = m; nd->nd_bpos = mtod(m, char *) + m->m_len; NFSCL_DEBUG(4, "nfsrpc_writedsmir: lastmb len=%d\n", m->m_len); } nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error, nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat != 0) error = nd->nd_repstat; else { if (vers == NFS_VER3) { error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL, NULL); NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n", error); if (error != 0) goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len, rlen); if (rlen != len) { error = NFSERR_IO; NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len, rlen); goto nfsmout; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest commitment level * obtained by any of the RPCs. */ if (committed == NFSWRITE_FILESYNC) committed = commit; else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; NFSLOCKDS(dsp); if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) { NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF; } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); } NFSUNLOCKDS(dsp); } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); *iomode = committed; if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; return (error); } /* + * Start up the thread that will execute nfsrpc_writedsmir(). + */ +static void +start_writedsmir(void *arg, int pending) +{ + struct nfsclwritedsdorpc *drpc; + + drpc = (struct nfsclwritedsdorpc *)arg; + drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode, + &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len, + drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred, + drpc->p); + drpc->done = 1; + NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err); +} + +/* + * Set up the write DS mirror call for the pNFS I/O thread. + */ +static int +nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit, + nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len, + struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers, + struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p) +{ + int error, ret; + + error = 0; + drpc->done = 0; + drpc->vp = vp; + drpc->iomode = *iomode; + drpc->must_commit = *must_commit; + drpc->stateidp = stateidp; + drpc->dsp = dsp; + drpc->off = off; + drpc->len = len; + drpc->fhp = fhp; + drpc->m = m; + drpc->vers = vers; + drpc->minorvers = minorvers; + drpc->cred = cred; + drpc->p = p; + drpc->inprog = 0; + ret = EIO; + if (nfs_pnfsiothreads > 0) { + ret = nfs_pnfsio(start_writedsmir, drpc); + NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret); + } + if (ret != 0) + error = nfsrpc_writedsmir(vp, iomode, must_commit, stateidp, + dsp, off, len, fhp, m, vers, minorvers, cred, p); + NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error); + return (error); +} + +/* * Free up the nfsclds structure. */ void nfscl_freenfsclds(struct nfsclds *dsp) { int i; if (dsp == NULL) return; if (dsp->nfsclds_sockp != NULL) { NFSFREECRED(dsp->nfsclds_sockp->nr_cred); NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx); free(dsp->nfsclds_sockp->nr_nam, M_SONAME); free(dsp->nfsclds_sockp, M_NFSSOCKREQ); } NFSFREEMUTEX(&dsp->nfsclds_mtx); NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx); for (i = 0; i < NFSV4_CBSLOTS; i++) { if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL) m_freem( dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply); } free(dsp, M_NFSCLDS); } static enum nfsclds_state nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp, struct nfsclds **retdspp) { struct nfsclds *dsp, *cur_dsp; /* * Search the list of nfsclds structures for one with the same * server. */ cur_dsp = NULL; TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) { if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen && dsp->nfsclds_servownlen != 0 && !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown, dsp->nfsclds_servownlen) && dsp->nfsclds_sess.nfsess_defunct == 0) { NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n", TAILQ_FIRST(&nmp->nm_sess), dsp, dsp->nfsclds_flags); /* Server major id matches. */ if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) { *retdspp = dsp; return (NFSDSP_USETHISSESSION); } /* * Note the first match, so it can be used for * sequence'ing new sessions. */ if (cur_dsp == NULL) cur_dsp = dsp; } } if (cur_dsp != NULL) { *retdspp = cur_dsp; return (NFSDSP_SEQTHISSESSION); } return (NFSDSP_NOTFOUND); } /* * NFS commit rpc to a NFSv4.1 DS. */ static int nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfssockreq *nrp; struct nfsvattr na; int attrflag, error; nd->nd_mrep = NULL; if (vers == 0 || vers == NFS_VER4) { nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); vers = NFS_VER4; } else nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers, minorvers); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; *tl = txdr_unsigned(cnt); nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error, nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat == 0) { if (vers == NFS_VER3) { error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL, NULL); NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error); if (error != 0) goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); NFSLOCKDS(dsp); if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); error = NFSERR_STALEWRITEVERF; } NFSUNLOCKDS(dsp); } nfsmout: if (error == 0 && nd->nd_repstat != 0) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Start up the thread that will execute nfsrpc_commitds(). + */ +static void +start_commitds(void *arg, int pending) +{ + struct nfsclwritedsdorpc *drpc; + + drpc = (struct nfsclwritedsdorpc *)arg; + drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len, + drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred, + drpc->p); + drpc->done = 1; + NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err); +} + +/* + * Set up the commit DS mirror call for the pNFS I/O thread. + */ +static int +nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp, + struct nfsfh *fhp, int vers, int minorvers, + struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p) +{ + int error, ret; + + error = 0; + drpc->done = 0; + drpc->vp = vp; + drpc->off = offset; + drpc->len = cnt; + drpc->dsp = dsp; + drpc->fhp = fhp; + drpc->vers = vers; + drpc->minorvers = minorvers; + drpc->cred = cred; + drpc->p = p; + drpc->inprog = 0; + ret = EIO; + if (nfs_pnfsiothreads > 0) { + ret = nfs_pnfsio(start_commitds, drpc); + NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret); + } + if (ret != 0) + error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers, + minorvers, cred, p); + NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error); return (error); } /* * Set up the XDR arguments for the LayoutGet operation. */ static void nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset, uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype, int layoutlen, int usecurstateid) { uint32_t *tl; NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER + NFSX_STATEID); *tl++ = newnfs_false; /* Don't signal availability. */ *tl++ = txdr_unsigned(layouttype); *tl++ = txdr_unsigned(iomode); txdr_hyper(offset, tl); tl += 2; txdr_hyper(len, tl); tl += 2; txdr_hyper(minlen, tl); tl += 2; if (usecurstateid != 0) { /* Special stateid for Current stateid. */ *tl++ = txdr_unsigned(1); *tl++ = 0; *tl++ = 0; *tl++ = 0; } else { *tl++ = txdr_unsigned(stateidp->seqid); NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid); *tl++ = stateidp->other[0]; *tl++ = stateidp->other[1]; *tl++ = stateidp->other[2]; } *tl = txdr_unsigned(layoutlen); } /* * Parse the reply for a successful LayoutGet operation. */ static int nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp) { uint32_t *tl; struct nfsclflayout *flp, *prevflp, *tflp; int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen; int m, mirrorcnt; uint64_t retlen, off; struct nfsfh *nfhp; uint8_t *cp; uid_t user; gid_t grp; NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n"); error = 0; flp = NULL; gotiomode = -1; NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID); if (*tl++ != 0) *retonclosep = 1; else *retonclosep = 0; stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep, (int)stateidp->seqid); stateidp->other[0] = *tl++; stateidp->other[1] = *tl++; stateidp->other[2] = *tl++; cnt = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "layg cnt=%d\n", cnt); if (cnt <= 0 || cnt > 10000) { /* Don't accept more than 10000 layouts in reply. */ error = NFSERR_BADXDR; goto nfsmout; } for (i = 0; i < cnt; i++) { /* Dissect to the layout type. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; retlen = fxdr_hyper(tl); tl += 2; iomode = fxdr_unsigned(int, *tl++); laytype = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype, (uintmax_t)off, (uintmax_t)retlen, iomode); /* Ignore length of layout body for now. */ if (laytype == NFSLAYOUT_NFSV4_1_FILES) { /* Parse the File layout up to fhcnt. */ NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER + NFSX_V4DEVICEID); fhcnt = fxdr_unsigned(int, *(tl + 4 + NFSX_V4DEVICEID / NFSX_UNSIGNED)); NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt); if (fhcnt < 0 || fhcnt > 100) { /* Don't accept more than 100 file handles. */ error = NFSERR_BADXDR; goto nfsmout; } if (fhcnt > 0) flp = malloc(sizeof(*flp) + fhcnt * sizeof(struct nfsfh *), M_NFSFLAYOUT, M_WAITOK); else flp = malloc(sizeof(*flp), M_NFSFLAYOUT, M_WAITOK); flp->nfsfl_flags = NFSFL_FILE; flp->nfsfl_fhcnt = 0; flp->nfsfl_devp = NULL; flp->nfsfl_off = off; if (flp->nfsfl_off + retlen < flp->nfsfl_off) flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off; else flp->nfsfl_end = flp->nfsfl_off + retlen; flp->nfsfl_iomode = iomode; if (gotiomode == -1) gotiomode = flp->nfsfl_iomode; /* Ignore layout body length for now. */ NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++); NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util); flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++); flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2; NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n", flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff); for (j = 0; j < fhcnt; j++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); nfhlen = fxdr_unsigned(int, *tl); if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) { error = NFSERR_BADXDR; goto nfsmout; } nfhp = malloc(sizeof(*nfhp) + nfhlen - 1, M_NFSFH, M_WAITOK); flp->nfsfl_fh[j] = nfhp; flp->nfsfl_fhcnt++; nfhp->nfh_len = nfhlen; NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen)); NFSBCOPY(cp, nfhp->nfh_fh, nfhlen); } } else if (laytype == NFSLAYOUT_FLEXFILE) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); mirrorcnt = fxdr_unsigned(int, *(tl + 2)); NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt); if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) { error = NFSERR_BADXDR; goto nfsmout; } flp = malloc(sizeof(*flp) + mirrorcnt * sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK); flp->nfsfl_flags = NFSFL_FLEXFILE; flp->nfsfl_mirrorcnt = mirrorcnt; flp->nfsfl_devp = NULL; flp->nfsfl_off = off; if (flp->nfsfl_off + retlen < flp->nfsfl_off) flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off; else flp->nfsfl_end = flp->nfsfl_off + retlen; flp->nfsfl_iomode = iomode; if (gotiomode == -1) gotiomode = flp->nfsfl_iomode; flp->nfsfl_stripeunit = fxdr_hyper(tl); NFSCL_DEBUG(4, "stripeunit=%ju\n", (uintmax_t)flp->nfsfl_stripeunit); for (j = 0; j < mirrorcnt; j++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); k = fxdr_unsigned(int, *tl); if (k < 1 || k > 128) { error = NFSERR_BADXDR; goto nfsmout; } NFSCL_DEBUG(4, "servercnt=%d\n", k); for (l = 0; l < k; l++) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4DEVICEID + NFSX_STATEID + 2 * NFSX_UNSIGNED); if (l == 0) { /* Just use the first server. */ NFSBCOPY(tl, flp->nfsfl_ffm[j].dev, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); tl++; flp->nfsfl_ffm[j].st.seqid = *tl++; flp->nfsfl_ffm[j].st.other[0] = *tl++; flp->nfsfl_ffm[j].st.other[1] = *tl++; flp->nfsfl_ffm[j].st.other[2] = *tl++; NFSCL_DEBUG(4, "st.seqid=%u " "st.o0=0x%x st.o1=0x%x " "st.o2=0x%x\n", flp->nfsfl_ffm[j].st.seqid, flp->nfsfl_ffm[j].st.other[0], flp->nfsfl_ffm[j].st.other[1], flp->nfsfl_ffm[j].st.other[2]); } else tl += ((NFSX_V4DEVICEID + NFSX_STATEID + NFSX_UNSIGNED) / NFSX_UNSIGNED); fhcnt = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt); if (fhcnt < 1 || fhcnt > NFSDEV_MAXVERS) { error = NFSERR_BADXDR; goto nfsmout; } for (m = 0; m < fhcnt; m++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); nfhlen = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "nfhlen=%d\n", nfhlen); if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) { error = NFSERR_BADXDR; goto nfsmout; } NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen)); if (l == 0) { flp->nfsfl_ffm[j].fhcnt = fhcnt; nfhp = malloc( sizeof(*nfhp) + nfhlen - 1, M_NFSFH, M_WAITOK); flp->nfsfl_ffm[j].fh[m] = nfhp; nfhp->nfh_len = nfhlen; NFSBCOPY(cp, nfhp->nfh_fh, nfhlen); NFSCL_DEBUG(4, "got fh\n"); } } /* Now, get the ffsd_user/ffds_group. */ error = nfsrv_parseug(nd, 0, &user, &grp, curthread); NFSCL_DEBUG(4, "after parseu=%d\n", error); if (error == 0) error = nfsrv_parseug(nd, 1, &user, &grp, curthread); NFSCL_DEBUG(4, "aft parseg=%d\n", grp); if (error != 0) goto nfsmout; NFSCL_DEBUG(4, "user=%d group=%d\n", user, grp); if (l == 0) { flp->nfsfl_ffm[j].user = user; flp->nfsfl_ffm[j].group = grp; NFSCL_DEBUG(4, "usr=%d grp=%d\n", user, grp); } } } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++); flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl); NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n", flp->nfsfl_fflags, flp->nfsfl_statshint); } else { error = NFSERR_BADXDR; goto nfsmout; } if (flp->nfsfl_iomode == gotiomode) { /* Keep the list in increasing offset order. */ tflp = LIST_FIRST(flhp); prevflp = NULL; while (tflp != NULL && tflp->nfsfl_off < flp->nfsfl_off) { prevflp = tflp; tflp = LIST_NEXT(tflp, nfsfl_list); } if (prevflp == NULL) LIST_INSERT_HEAD(flhp, flp, nfsfl_list); else LIST_INSERT_AFTER(prevflp, flp, nfsfl_list); NFSCL_DEBUG(4, "flp inserted\n"); } else { printf("nfscl_layoutget(): got wrong iomode\n"); nfscl_freeflayout(flp); } flp = NULL; } nfsmout: NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error); if (error != 0 && flp != NULL) nfscl_freeflayout(flp); return (error); } /* * Parse a user/group digit string. */ static int nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp, NFSPROC_T *p) { uint32_t *tl; char *cp, *str, str0[NFSV4_SMALLSTR + 1]; uint32_t len = 0; int error = 0; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(uint32_t, *tl); if (len > NFSV4_OPAQUELIMIT) { error = NFSERR_BADXDR; goto nfsmout; } NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len); if (len == 0) { if (dogrp != 0) *gidp = GID_NOGROUP; else *uidp = UID_NOBODY; return (0); } if (len > NFSV4_SMALLSTR) str = malloc(len + 1, M_TEMP, M_WAITOK); else str = str0; NFSM_DISSECT(cp, char *, NFSM_RNDUP(len)); NFSBCOPY(cp, str, len); str[len] = '\0'; NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str); if (dogrp != 0) error = nfsv4_strtogid(nd, str, len, gidp, p); else error = nfsv4_strtouid(nd, str, len, uidp, p); nfsmout: if (len > NFSV4_SMALLSTR) free(str, M_TEMP); NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error); return (error); } /* * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(), * so that it does both an Open and a Layoutget. */ static int nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode, struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p) { struct nfscllayout *lyp; struct nfsclflayout *flp; struct nfsclflayouthead flh; int error, islocked, layoutlen, recalled, retonclose, usecurstateid; int layouttype, laystat; nfsv4stateid_t stateid; struct nfsclsession *tsep; error = 0; if (NFSHASFLEXFILE(nmp)) layouttype = NFSLAYOUT_FLEXFILE; else layouttype = NFSLAYOUT_NFSV4_1_FILES; /* * If lyp is returned non-NULL, there will be a refcnt (shared lock) * on it, iff flp != NULL or a lock (exclusive lock) on it iff * flp == NULL. */ lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp, &recalled); NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp); if (lyp == NULL) islocked = 0; else if (flp != NULL) islocked = 1; else islocked = 2; if ((lyp == NULL || flp == NULL) && recalled == 0) { LIST_INIT(&flh); tsep = nfsmnt_mdssession(nmp); layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED); if (lyp == NULL) usecurstateid = 1; else { usecurstateid = 0; stateid.seqid = lyp->nfsly_stateid.seqid; stateid.other[0] = lyp->nfsly_stateid.other[0]; stateid.other[1] = lyp->nfsly_stateid.other[1]; stateid.other[2] = lyp->nfsly_stateid.other[2]; } error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, dpp, &stateid, usecurstateid, layouttype, layoutlen, &retonclose, &flh, &laystat, cred, p); NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n", laystat, error); laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen, &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat, &islocked, cred, p); } else error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0); if (islocked == 2) nfscl_rellayout(lyp, 1); else if (islocked == 1) nfscl_rellayout(lyp, 0); return (error); } /* * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS * enabled, only for the CLAIM_NULL case. All other NFSv4 Opens are * handled by nfsrpc_openrpc(). * For the case where op == NULL, dvp is the directory. When op != NULL, it * can be NULL. */ static int nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode, struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp, nfsv4stateid_t *stateidp, int usecurstateid, int layouttype, int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp, int *laystatp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscldeleg *ndp = NULL; struct nfsvattr nfsva; struct nfsclsession *tsep; uint32_t rflags, deleg; nfsattrbit_t attrbits; int error, ret, acesize, limitby, iomode; *dpp = NULL; *laystatp = ENXIO; nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); nfsm_strtom(nd, name, namelen); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LAYOUTGET); if ((mode & NFSV4OPEN_ACCESSWRITE) != 0) iomode = NFSLAYOUTIOMODE_RW; else iomode = NFSLAYOUTIOMODE_READ; nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp, layouttype, layoutlen, usecurstateid); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (nd->nd_repstat != 0) *laystatp = nd->nd_repstat; if ((nd->nd_flag & ND_NOMOREDATA) == 0) { /* ND_NOMOREDATA will be set if the Open operation failed. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error != 0) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(u_int32_t, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) op->nfso_own->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); ndp = malloc(sizeof(struct nfscldeleg) + newfhlen, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&ndp->nfsdl_owner); LIST_INIT(&ndp->nfsdl_lock); ndp->nfsdl_clp = op->nfso_own->nfsow_clp; ndp->nfsdl_fhlen = newfhlen; NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen); newnfs_copyincred(cred, &ndp->nfsdl_cred); nfscl_lockinit(&ndp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); ndp->nfsdl_stateid.seqid = *tl++; ndp->nfsdl_stateid.other[0] = *tl++; ndp->nfsdl_stateid.other[1] = *tl++; ndp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { ndp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: ndp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: ndp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); ndp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; }; } else ndp->nfsdl_flags = NFSCLDL_READ; if (ret != 0) ndp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret, &acesize, p); if (error != 0) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 || nfscl_assumeposixlocks) op->nfso_posixlock = 1; else op->nfso_posixlock = 0; NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); /* If the 2nd element == NFS_OK, the Getattr succeeded. */ if (*++tl == 0) { error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error != 0) goto nfsmout; if (ndp != NULL) { ndp->nfsdl_change = nfsva.na_filerev; ndp->nfsdl_modtime = nfsva.na_mtime; ndp->nfsdl_flags |= NFSCLDL_MODTIMESET; *dpp = ndp; ndp = NULL; } /* * At this point, the Open has succeeded, so set * nd_repstat = NFS_OK. If the Layoutget failed, * this function just won't return a layout. */ if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); *laystatp = fxdr_unsigned(int, *++tl); if (*laystatp == 0) { error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp); if (error != 0) *laystatp = error; } } else nd->nd_repstat = 0; /* Return 0 for Open. */ } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; nfsmout: free(ndp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * Similar nfsrpc_createv4(), but also does the LayoutGet operation. * Used only for mounts with pNFS enabled. */ static int nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp, int usecurstateid, int layouttype, int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp, int *laystatp) { uint32_t *tl; int error = 0, deleg, newone, ret, acesize, limitby; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsclopen *op; struct nfscldeleg *dp = NULL; struct nfsnode *np; struct nfsfh *nfhp; struct nfsclsession *tsep; nfsattrbit_t attrbits; nfsv4stateid_t stateid; uint32_t rflags; struct nfsmount *nmp; nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); *laystatp = ENXIO; *unlockedp = 0; *nfhpp = NULL; *dpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp); /* * For V4, this is actually an Open op. */ NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(owp->nfsow_seqid); *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD); *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CREATE); if ((fmode & O_EXCL) != 0) { if (NFSHASSESSPERSIST(nmp)) { /* Use GUARDED for persistent sessions. */ *tl = txdr_unsigned(NFSCREATE_GUARDED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } else { /* Otherwise, use EXCLUSIVE4_1. */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; nfscl_fillsattr(nd, vap, dvp, 0, 0); } } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); nfsm_strtom(nd, name, namelen); /* Get the new file's handle and attributes, plus save the FH. */ NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_SAVEFH); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); nfsrv_putattrbit(nd, &attrbits); /* Get the directory's post-op attributes. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_RESTOREFH); *tl = txdr_unsigned(NFSV4OP_LAYOUTGET); nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp, layouttype, layoutlen, usecurstateid); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error != 0) return (error); NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat, error); if (nd->nd_repstat != 0) *laystatp = nd->nd_repstat; NFSCL_INCRSEQID(owp->nfsow_seqid, nd); if ((nd->nd_flag & ND_NOMOREDATA) == 0) { NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n"); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); stateid.seqid = *tl++; stateid.other[0] = *tl++; stateid.other[1] = *tl++; stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); nfsrv_getattrbits(nd, &attrbits, NULL, NULL); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) owp->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&dp->nfsdl_owner); LIST_INIT(&dp->nfsdl_lock); dp->nfsdl_clp = owp->nfsow_clp; newnfs_copyincred(cred, &dp->nfsdl_cred); nfscl_lockinit(&dp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); dp->nfsdl_stateid.seqid = *tl++; dp->nfsdl_stateid.other[0] = *tl++; dp->nfsdl_stateid.other[1] = *tl++; dp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { dp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: dp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: dp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); dp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; }; } else { dp->nfsdl_flags = NFSCLDL_READ; } if (ret != 0) dp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret, &acesize, p); if (error != 0) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } /* Now, we should have the status for the SaveFH. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl == 0) { NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n"); /* * Now, process the GetFH and Getattr for the newly * created file. nfscl_mtofh() will set * ND_NOMOREDATA if these weren't successful. */ error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error); if (error != 0) goto nfsmout; } else nd->nd_flag |= ND_NOMOREDATA; /* Now we have the PutFH and Getattr for the directory. */ if ((nd->nd_flag & ND_NOMOREDATA) == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; else { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; } } if ((nd->nd_flag & ND_NOMOREDATA) == 0) { /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error); if (error != 0) goto nfsmout; *dattrflagp = 1; if (dp != NULL && *attrflagp != 0) { dp->nfsdl_change = nnap->na_filerev; dp->nfsdl_modtime = nnap->na_mtime; dp->nfsdl_flags |= NFSCLDL_MODTIMESET; } /* * We can now complete the Open state. */ nfhp = *nfhpp; if (dp != NULL) { dp->nfsdl_fhlen = nfhp->nfh_len; NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len); } /* * Get an Open structure that will be * attached to the OpenOwner, acquired already. */ error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, NULL, &op, &newone, NULL, 0); if (error != 0) goto nfsmout; op->nfso_stateid = stateid; newnfs_copyincred(cred, &op->nfso_cred); nfscl_openrelease(nmp, op, error, newone); *unlockedp = 1; /* Now, handle the RestoreFH and LayoutGet. */ if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); *laystatp = fxdr_unsigned(int, *(tl + 3)); if (*laystatp == 0) { error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp); if (error != 0) *laystatp = error; } NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n", error); } else nd->nd_repstat = 0; } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(owp->nfsow_clp); nfsmout: NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error); if (error == 0) *dpp = dp; else free(dp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * Similar to nfsrpc_getopenlayout(), except that it used for the Create case. */ static int nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff, int *unlockedp) { struct nfscllayout *lyp; struct nfsclflayouthead flh; struct nfsfh *nfhp; struct nfsclsession *tsep; struct nfsmount *nmp; nfsv4stateid_t stateid; int error, layoutlen, layouttype, retonclose, laystat; error = 0; nmp = VFSTONFS(dvp->v_mount); if (NFSHASFLEXFILE(nmp)) layouttype = NFSLAYOUT_FLEXFILE; else layouttype = NFSLAYOUT_NFSV4_1_FILES; LIST_INIT(&flh); tsep = nfsmnt_mdssession(nmp); layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED); error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode, owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff, unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose, &flh, &laystat); NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n", laystat, error); lyp = NULL; if (laystat == 0) { nfhp = *nfhpp; laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh, nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL, cred, p); } else laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL, cred, p); if (laystat == 0) nfscl_rellayout(lyp, 0); return (error); } /* * Process the results of a layoutget() operation. */ static int nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp, int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit, struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype, int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p) { struct nfsclflayout *tflp; struct nfscldevinfo *dip; uint8_t *dev; if (laystat == NFSERR_UNKNLAYOUTTYPE) { NFSLOCKMNT(nmp); if (!NFSHASFLEXFILE(nmp)) { /* Switch to using Flex File Layout. */ nmp->nm_state |= NFSSTA_FLEXFILE; } else if (layouttype == NFSLAYOUT_FLEXFILE) { /* Disable pNFS. */ NFSCL_DEBUG(1, "disable PNFS\n"); nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE); } NFSUNLOCKMNT(nmp); } if (laystat == 0) { NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n"); LIST_FOREACH(tflp, flhp, nfsfl_list) { laystat = nfscl_adddevinfo(nmp, NULL, tflp); NFSCL_DEBUG(4, "aft adddev=%d\n", laystat); if (laystat != 0) { if (layouttype == NFSLAYOUT_FLEXFILE) dev = tflp->nfsfl_ffm[0].dev; else dev = tflp->nfsfl_dev; laystat = nfsrpc_getdeviceinfo(nmp, dev, layouttype, notifybit, &dip, cred, p); NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n", laystat); if (laystat != 0) break; laystat = nfscl_adddevinfo(nmp, dip, tflp); if (laystat != 0) printf("getlayout: cannot add\n"); } } } if (laystat == 0) { /* * nfscl_layout() always returns with the nfsly_lock * set to a refcnt (shared lock). * Passing in dvp is sufficient, since it is only used to * get the fsid for the file system. */ laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp, layouttype, retonclose, flhp, lypp, cred, p); NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n", laystat); if (laystat == 0 && islockedp != NULL) *islockedp = 1; } return (laystat); } Index: projects/pnfs-planb-server-stable11/sys/fs/nfsserver/nfs_nfsdport.c =================================================================== --- projects/pnfs-planb-server-stable11/sys/fs/nfsserver/nfs_nfsdport.c (revision 324529) +++ projects/pnfs-planb-server-stable11/sys/fs/nfsserver/nfs_nfsdport.c (revision 324530) @@ -1,5183 +1,5184 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include /* * Functions that perform the vfs operations required by the routines in * nfsd_serv.c. It is hoped that this change will make the server more * portable. */ #include #include #include #include #include FEATURE(nfsd, "NFSv4 server"); extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern int nfsrv_useacl; extern int newnfs_numnfsd; extern struct mount nfsv4root_mnt; extern struct nfsrv_stablefirst nfsrv_stablefirst; extern void (*nfsd_call_servertimer)(void); extern SVCPOOL *nfsrvd_pool; extern struct nfsv4lock nfsd_suspend_lock; extern struct nfsclienthashhead *nfsclienthash; extern struct nfslockhashhead *nfslockhash; extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; extern struct nfsstatsv1 nfsstatsv1; extern struct nfslayouthash *nfslayouthash; extern int nfsrv_layouthashsize; extern struct mtx nfsrv_dslock_mtx; extern struct mtx nfsrv_dsclock_mtx; extern struct mtx nfsrv_dsrmlock_mtx; extern struct mtx nfsrv_dwrpclock_mtx; extern struct mtx nfsrv_dsrpclock_mtx; extern struct mtx nfsrv_darpclock_mtx; +extern int nfs_pnfsiothreads; struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; NFSDLOCKMUTEX; NFSSTATESPINLOCK; struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; struct mtx nfsrc_udpmtx; struct mtx nfs_v4root_mutex; struct nfsrvfh nfs_rootfh, nfs_pubfh; int nfs_pubfhset = 0, nfs_rootfhset = 0; struct proc *nfsd_master_proc = NULL; int nfsd_debuglevel = 0; static pid_t nfsd_master_pid = (pid_t)-1; static char nfsd_master_comm[MAXCOMLEN + 1]; static struct timeval nfsd_master_start; static uint32_t nfsv4_sysid = 0; static fhandle_t zerofh; static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, struct ucred *); int nfsrv_enable_crossmntpt = 1; static int nfs_commit_blks; static int nfs_commit_miss; extern int nfsrv_issuedelegs; extern int nfsrv_dolocallocks; extern int nfsd_enable_stringtouid; extern struct nfsdevicehead nfsrv_devidhead; static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, NFSPROC_T *); static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, int *, char *); static void nfsrv_pnfsremove(struct vnode **, int, char *, NFSPROC_T *); static int nfsrv_proxyds(struct nfsrv_descript *, struct vnode *, off_t, int, struct ucred *, struct thread *, int, struct mbuf **, char *, struct mbuf **, struct nfsvattr *, struct acl *); static int nfsrv_dsgetsockmnt(struct vnode *, int, char *, int, int *, NFSPROC_T *, struct vnode **, struct nfsmount **, fhandle_t *, char *, char *); static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, char *); static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct acl *); static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct nfsvattr *); static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount *, struct nfsvattr *); static int nfsrv_putfhname(fhandle_t *, char *); static int nfsrv_pnfslookupds(struct vnode *, struct pnfsdsfile *, struct vnode *, NFSPROC_T *); +int nfs_pnfsio(task_fn_t *, void *); + SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 0, "Debug level for NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); static int nfsrv_pnfsgetdsattr = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); /* * nfsrv_dsdirsize can only be increased and only when the nfsd threads are * not running. * The dsN subdirectories for the increased values must have been created * on all DS servers before this increase is done. */ u_int nfsrv_dsdirsize = 20; static int sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) { int error, newdsdirsize; newdsdirsize = nfsrv_dsdirsize; error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || newnfs_numnfsd != 0) return (EINVAL); nfsrv_dsdirsize = newdsdirsize; return (0); } SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, CTLTYPE_UINT | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize), sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers"); #define MAX_REORDERED_RPC 16 #define NUM_HEURISTIC 1031 #define NHUSE_INIT 64 #define NHUSE_INC 16 #define NHUSE_MAX 2048 static struct nfsheur { struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ off_t nh_nextoff; /* next offset for sequential detection */ int nh_use; /* use count for selection */ int nh_seqcount; /* heuristic */ } nfsheur[NUM_HEURISTIC]; /* * Heuristic to detect sequential operation. */ static struct nfsheur * nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) { struct nfsheur *nh; int hi, try; /* Locate best candidate. */ try = 32; hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; nh = &nfsheur[hi]; while (try--) { if (nfsheur[hi].nh_vp == vp) { nh = &nfsheur[hi]; break; } if (nfsheur[hi].nh_use > 0) --nfsheur[hi].nh_use; hi = (hi + 1) % NUM_HEURISTIC; if (nfsheur[hi].nh_use < nh->nh_use) nh = &nfsheur[hi]; } /* Initialize hint if this is a new file. */ if (nh->nh_vp != vp) { nh->nh_vp = vp; nh->nh_nextoff = uio->uio_offset; nh->nh_use = NHUSE_INIT; if (uio->uio_offset == 0) nh->nh_seqcount = 4; else nh->nh_seqcount = 1; } /* Calculate heuristic. */ if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || uio->uio_offset == nh->nh_nextoff) { /* See comments in vfs_vnops.c:sequential_heuristic(). */ nh->nh_seqcount += howmany(uio->uio_resid, 16384); if (nh->nh_seqcount > IO_SEQMAX) nh->nh_seqcount = IO_SEQMAX; } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { /* Probably a reordered RPC, leave seqcount alone. */ } else if (nh->nh_seqcount > 1) { nh->nh_seqcount /= 2; } else { nh->nh_seqcount = 0; } nh->nh_use += NHUSE_INC; if (nh->nh_use > NHUSE_MAX) nh->nh_use = NHUSE_MAX; return (nh); } /* * Get attributes into nfsvattr structure. */ int nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct nfsrv_descript *nd, struct thread *p, int vpislocked, nfsattrbit_t *attrbitp) { int error, gotattr, lockedit = 0; struct nfsvattr na; if (vpislocked == 0) { /* * When vpislocked == 0, the vnode is either exclusively * locked by this thread or not locked by this thread. * As such, shared lock it, if not exclusively locked. */ if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { lockedit = 1; NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); } } /* * Acquire the Change, Size and TimeModify attributes, as required. * This needs to be done for regular files if: * - non-NFSv4 RPCs or * - when attrbitp == NULL or * - an NFSv4 RPC with any of the above attributes in attrbitp. * A return of 0 for nfsrv_proxyds() indicates that it has acquired * these attributes. nfsrv_proxyds() will return an error if the * server is not a pNFS one. */ gotattr = 0; if (vp->v_type == VREG && (attrbitp == NULL || (nd->nd_flag & ND_NFSV4) == 0 || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY))) { error = nfsrv_proxyds(nd, vp, 0, 0, nd->nd_cred, p, NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL); if (error == 0) gotattr = 1; } error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); if (lockedit != 0) NFSVOPUNLOCK(vp, 0); /* * If we got the Change, Size and Modify Time from the DS, * replace them. */ if (gotattr != 0) { nvap->na_atime = na.na_atime; nvap->na_mtime = na.na_mtime; nvap->na_filerev = na.na_filerev; nvap->na_size = na.na_size; } NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, error, (uintmax_t)na.na_filerev); NFSEXITCODE(error); return (error); } /* * Get a file handle for a vnode. */ int nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) { int error; NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VOP_VPTOFH(vp, &fhp->fh_fid); NFSEXITCODE(error); return (error); } /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use * vn_writechk() and VOP_ACCESSX() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write * case. * 2 - The owner is to be given access irrespective of mode bits for some * operations, so that processes that chmod after opening a file don't * break. */ int nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, u_int32_t *supportedtypep) { struct vattr vattr; int error = 0, getret = 0; if (vpislocked == 0) { if (NFSVOPLOCK(vp, LK_SHARED) != 0) { error = EPERM; goto out; } } if (accmode & VWRITE) { /* Just vn_writechk() changed to check rdonly */ /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character * device resident on the file system. */ if (NFSVNO_EXRDONLY(exp) || (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: error = EROFS; default: break; } } /* * If there's shared text associated with * the inode, try to free it up once. If * we fail, we can't allow writing. */ if (VOP_IS_TEXT(vp) && error == 0) error = ETXTBSY; } if (error != 0) { if (vpislocked == 0) NFSVOPUNLOCK(vp, 0); goto out; } /* * Should the override still be applied when ACLs are enabled? */ error = VOP_ACCESSX(vp, accmode, cred, p); if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { /* * Try again with VEXPLICIT_DENY, to see if the test for * deletion is supported. */ error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); if (error == 0) { if (vp->v_type == VDIR) { accmode &= ~(VDELETE | VDELETE_CHILD); accmode |= VWRITE; error = VOP_ACCESSX(vp, accmode, cred, p); } else if (supportedtypep != NULL) { *supportedtypep &= ~NFSACCESS_DELETE; } } } /* * Allow certain operations for the owner (reads and writes * on files that are already open). */ if (override != NFSACCCHK_NOOVERRIDE && (error == EPERM || error == EACCES)) { if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) error = 0; else if (override & NFSACCCHK_ALLOWOWNER) { getret = VOP_GETATTR(vp, &vattr, cred); if (getret == 0 && cred->cr_uid == vattr.va_uid) error = 0; } } if (vpislocked == 0) NFSVOPUNLOCK(vp, 0); out: NFSEXITCODE(error); return (error); } /* * Set attribute(s) vnop. */ int nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error; error = VOP_SETATTR(vp, &nvap->na_vattr, cred); if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || nvap->na_vattr.va_gid != (gid_t)VNOVAL || nvap->na_vattr.va_size != VNOVAL || nvap->na_vattr.va_mode != (mode_t)VNOVAL || nvap->na_vattr.va_atime.tv_sec != VNOVAL || nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { /* For a pNFS server, set the attributes on the DS file. */ error = nfsrv_proxyds(NULL, vp, 0, 0, cred, p, NFSPROC_SETATTR, NULL, NULL, NULL, nvap, NULL); if (error == ENOENT) error = 0; } NFSEXITCODE(error); return (error); } /* * Set up nameidata for a lookup() call and do it. */ int nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, struct vnode **retdirp) { struct componentname *cnp = &ndp->ni_cnd; int i; struct iovec aiov; struct uio auio; int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; int error = 0, crossmnt; char *cp; *retdirp = NULL; cnp->cn_nameptr = cnp->cn_pnbuf; ndp->ni_lcf = 0; /* * Extract and set starting directory. */ if (dp->v_type != VDIR) { if (islocked) vput(dp); else vrele(dp); nfsvno_relpathbuf(ndp); error = ENOTDIR; goto out1; } if (islocked) NFSVOPUNLOCK(dp, 0); VREF(dp); *retdirp = dp; if (NFSVNO_EXRDONLY(exp)) cnp->cn_flags |= RDONLY; ndp->ni_segflg = UIO_SYSSPACE; crossmnt = 1; if (nd->nd_flag & ND_PUBLOOKUP) { ndp->ni_loopcnt = 0; if (cnp->cn_pnbuf[0] == '/') { vrele(dp); /* * Check for degenerate pathnames here, since lookup() * panics on them. */ for (i = 1; i < ndp->ni_pathlen; i++) if (cnp->cn_pnbuf[i] != '/') break; if (i == ndp->ni_pathlen) { error = NFSERR_ACCES; goto out; } dp = rootvnode; VREF(dp); } } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || (nd->nd_flag & ND_NFSV4) == 0) { /* * Only cross mount points for NFSv4 when doing a * mount while traversing the file system above * the mount point, unless nfsrv_enable_crossmntpt is set. */ cnp->cn_flags |= NOCROSSMOUNT; crossmnt = 0; } /* * Initialize for scan, set ni_startdir and bump ref on dp again * because lookup() will dereference ni_startdir. */ cnp->cn_thread = p; ndp->ni_startdir = dp; ndp->ni_rootdir = rootvnode; ndp->ni_topdir = NULL; if (!lockleaf) cnp->cn_flags |= LOCKLEAF; for (;;) { cnp->cn_nameptr = cnp->cn_pnbuf; /* * Call lookup() to do the real work. If an error occurs, * ndp->ni_vp and ni_dvp are left uninitialized or NULL and * we do not have to dereference anything before returning. * In either case ni_startdir will be dereferenced and NULLed * out. */ error = lookup(ndp); if (error) break; /* * Check for encountering a symbolic link. Trivial * termination occurs if no symlink encountered. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) nfsvno_relpathbuf(ndp); if (ndp->ni_vp && !lockleaf) NFSVOPUNLOCK(ndp->ni_vp, 0); break; } /* * Validate symlink */ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) NFSVOPUNLOCK(ndp->ni_dvp, 0); if (!(nd->nd_flag & ND_PUBLOOKUP)) { error = EINVAL; goto badlink2; } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; goto badlink2; } if (ndp->ni_pathlen > 1) cp = uma_zalloc(namei_zone, M_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = NULL; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { badlink1: if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); badlink2: vrele(ndp->ni_dvp); vput(ndp->ni_vp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { error = ENOENT; goto badlink1; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { error = ENAMETOOLONG; goto badlink1; } /* * Adjust or replace path */ if (ndp->ni_pathlen > 1) { NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; /* * Cleanup refs for next loop and check if root directory * should replace current directory. Normally ni_dvp * becomes the new base directory and is cleaned up when * we loop. Explicitly null pointers after invalidation * to clarify operation. */ vput(ndp->ni_vp); ndp->ni_vp = NULL; if (cnp->cn_pnbuf[0] == '/') { vrele(ndp->ni_dvp); ndp->ni_dvp = ndp->ni_rootdir; VREF(ndp->ni_dvp); } ndp->ni_startdir = ndp->ni_dvp; ndp->ni_dvp = NULL; } if (!lockleaf) cnp->cn_flags &= ~LOCKLEAF; out: if (error) { nfsvno_relpathbuf(ndp); ndp->ni_vp = NULL; ndp->ni_dvp = NULL; ndp->ni_startdir = NULL; } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { ndp->ni_dvp = NULL; } out1: NFSEXITCODE2(error, nd); return (error); } /* * Set up a pathname buffer and return a pointer to it and, optionally * set a hash pointer. */ void nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) { struct componentname *cnp = &ndp->ni_cnd; cnp->cn_flags |= (NOMACCHECK | HASBUF); cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); if (hashpp != NULL) *hashpp = NULL; *bufpp = cnp->cn_pnbuf; } /* * Release the above path buffer, if not released by nfsvno_namei(). */ void nfsvno_relpathbuf(struct nameidata *ndp) { if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) panic("nfsrelpath"); uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } /* * Readlink vnode op into an mbuf list. */ int nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) { struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; struct iovec *ivp = iv; struct uio io, *uiop = &io; struct mbuf *mp, *mp2 = NULL, *mp3 = NULL; int i, len, tlen, error = 0; len = 0; i = 0; while (len < NFS_MAXPATHLEN) { NFSMGET(mp); MCLGET(mp, M_WAITOK); mp->m_len = M_SIZE(mp); if (len == 0) { mp3 = mp2 = mp; } else { mp2->m_next = mp; mp2 = mp; } if ((len + mp->m_len) > NFS_MAXPATHLEN) { mp->m_len = NFS_MAXPATHLEN - len; len = NFS_MAXPATHLEN; } else { len += mp->m_len; } ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; i++; ivp++; } uiop->uio_iov = iv; uiop->uio_iovcnt = i; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; error = VOP_READLINK(vp, uiop, cred); if (error) { m_freem(mp3); *lenp = 0; goto out; } if (uiop->uio_resid > 0) { len -= uiop->uio_resid; tlen = NFSM_RNDUP(len); nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len); } *lenp = len; *mpp = mp3; *mpendp = mp; out: NFSEXITCODE(error); return (error); } /* * Read vnode op call into mbuf list. */ int nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) { struct mbuf *m; int i; struct iovec *iv; struct iovec *iv2; int error = 0, len, left, siz, tlen, ioflag = 0; struct mbuf *m2 = NULL, *m3; struct uio io, *uiop = &io; struct nfsheur *nh; /* * Attempt to read from a DS file. A return of ENOENT implies * there is no DS file to read. */ error = nfsrv_proxyds(NULL, vp, off, cnt, cred, p, NFSPROC_READDS, mpp, NULL, mpendp, NULL, NULL); if (error != ENOENT) return (error); len = left = NFSM_RNDUP(cnt); m3 = NULL; /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; while (left > 0) { NFSMGET(m); MCLGET(m, M_WAITOK); m->m_len = 0; siz = min(M_TRAILINGSPACE(m), left); left -= siz; i++; if (m3) m2->m_next = m; else m3 = m; m2 = m; } MALLOC(iv, struct iovec *, i * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv2 = iv; m = m3; left = len; i = 0; while (left > 0) { if (m == NULL) panic("nfsvno_read iov"); siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { iv->iov_base = mtod(m, caddr_t) + m->m_len; iv->iov_len = siz; m->m_len += siz; left -= siz; iv++; i++; } m = m->m_next; } uiop->uio_iovcnt = i; uiop->uio_offset = off; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; nh = nfsrv_sequential_heuristic(uiop, vp); ioflag |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); FREE((caddr_t)iv2, M_TEMP); if (error) { m_freem(m3); *mpp = NULL; goto out; } nh->nh_nextoff = uiop->uio_offset; tlen = len - uiop->uio_resid; cnt = cnt < tlen ? cnt : tlen; tlen = NFSM_RNDUP(cnt); if (tlen == 0) { m_freem(m3); m3 = NULL; } else if (len != tlen || tlen != cnt) nfsrv_adj(m3, len - tlen, tlen - cnt); *mpp = m3; *mpendp = m2; out: NFSEXITCODE(error); return (error); } /* * Write vnode op from an mbuf list. */ int nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int *stable, struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) { struct iovec *ivp; int i, len; struct iovec *iv; int ioflags, error; struct uio io, *uiop = &io; struct nfsheur *nh; /* * Attempt to write to a DS file. A return of ENOENT implies * there is no DS file to write. */ error = nfsrv_proxyds(NULL, vp, off, retlen, cred, p, NFSPROC_WRITEDS, &mp, cp, NULL, NULL, NULL); if (error != ENOENT) { *stable = NFSWRITE_FILESYNC; return (error); } MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv = ivp; uiop->uio_iovcnt = cnt; i = mtod(mp, caddr_t) + mp->m_len - cp; len = retlen; while (len > 0) { if (mp == NULL) panic("nfsvno_write"); if (i > 0) { i = min(i, len); ivp->iov_base = cp; ivp->iov_len = i; ivp++; len -= i; } mp = mp->m_next; if (mp) { i = mp->m_len; cp = mtod(mp, caddr_t); } } if (*stable == NFSWRITE_UNSTABLE) ioflags = IO_NODELOCKED; else ioflags = (IO_SYNC | IO_NODELOCKED); uiop->uio_resid = retlen; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; NFSUIOPROC(uiop, p); uiop->uio_offset = off; nh = nfsrv_sequential_heuristic(uiop, vp); ioflags |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; error = VOP_WRITE(vp, uiop, ioflags, cred); if (error == 0) nh->nh_nextoff = uiop->uio_offset; FREE((caddr_t)iv, M_TEMP); NFSEXITCODE(error); return (error); } /* * Common code for creating a regular file (plus special files for V2). */ int nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp) { u_quad_t tempsize; int error; error = nd->nd_repstat; if (!error && ndp->ni_vp == NULL) { if (nvap->na_type == VREG || nvap->na_type == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); /* For a pNFS server, create the data file on a DS. */ if (error == 0 && nvap->na_type == VREG) { /* * Create a data file on a DS for a pNFS server. * This function just returns if not * running a pNFS DS or the creation fails. */ nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, nd->nd_cred, p); } vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); if (!error) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; error = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, nd->nd_cred); if (error != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; error = NFSERR_NOTSUPP; } } } /* * NFS V2 Only. nfsrvd_mknod() does this for V3. * (This implies, just get out on an error.) */ } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || nvap->na_type == VFIFO) { if (nvap->na_type == VCHR && rdev == 0xffffffff) nvap->na_type = VFIFO; if (nvap->na_type != VFIFO && (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV, 0))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } nvap->na_rdev = rdev; error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); if (error) goto out; } else { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = ENXIO; goto out; } *vpp = ndp->ni_vp; } else { /* * Handle cases where error is already set and/or * the file exists. * 1 - clean up the lookup * 2 - iff !error and na_size set, truncate it */ vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); *vpp = ndp->ni_vp; if (ndp->ni_dvp == *vpp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (!error && nvap->na_size != VNOVAL) { error = nfsvno_accchk(*vpp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (!error) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; error = VOP_SETATTR(*vpp, &nvap->na_vattr, nd->nd_cred); } } if (error) vput(*vpp); } out: NFSEXITCODE(error); return (error); } /* * Do a mknod vnode op. */ int nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p) { int error = 0; enum vtype vtyp; vtyp = nvap->na_type; /* * Iff doesn't exist, create it. */ if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = NFSERR_BADTYPE; goto out; } if (vtyp == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); } else { if (nvap->na_type != VFIFO && (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); /* * Since VOP_MKNOD returns the ni_vp, I can't * see any reason to do the lookup. */ } out: NFSEXITCODE(error); return (error); } /* * Mkdir vnode op. */ int nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp != NULL) { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); nfsvno_relpathbuf(ndp); error = EEXIST; goto out; } error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); out: NFSEXITCODE(error); return (error); } /* * symlink vnode op. */ int nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr, pathcp); vput(ndp->ni_dvp); vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); /* * Although FreeBSD still had the lookup code in * it for 7/current, there doesn't seem to be any * point, since VOP_SYMLINK() returns the ni_vp. * Just vput it for v2. */ if (!not_v2 && !error) vput(ndp->ni_vp); out: NFSEXITCODE(error); return (error); } /* * Parse symbolic link arguments. * This function has an ugly side effect. It will MALLOC() an area for * the symlink and set iov_base to point to it, only if it succeeds. * So, if it returns with uiop->uio_iov->iov_base != NULL, that must * be FREE'd later. */ int nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, struct thread *p, char **pathcpp, int *lenp) { u_int32_t *tl; char *pathcp = NULL; int error = 0, len; struct nfsv2_sattr *sp; *pathcpp = NULL; *lenp = 0; if ((nd->nd_flag & ND_NFSV3) && (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len > NFS_MAXPATHLEN || len <= 0) { error = EBADRPC; goto nfsmout; } MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, pathcp, len); if (error) goto nfsmout; if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); } *pathcpp = pathcp; *lenp = len; NFSEXITCODE2(0, nd); return (0); nfsmout: if (pathcp) free(pathcp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Remove a non-directory object. */ int nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS]; int error = 0, i, mirrorcnt; char fname[PNFS_FILENAME_LEN + 1]; vp = ndp->ni_vp; dsdvp[0] = NULL; if (vp->v_type == VDIR) error = NFSERR_ISDIR; else if (is_v4) error = nfsrv_checkremove(vp, 1, p); if (error == 0) nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname); if (!error) error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); if (dsdvp[0] != NULL) { if (error == 0) nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, p); for (i = 0; i < mirrorcnt; i++) NFSVOPUNLOCK(dsdvp[i], 0); } if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Remove a directory. */ int nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp; int error = 0; vp = ndp->ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (ndp->ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_vflag & VV_ROOT) error = EBUSY; out: if (!error) error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Rename vnode op. */ int nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) { struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS]; int error = 0, i, mirrorcnt; char fname[PNFS_FILENAME_LEN + 1]; dsdvp[0] = NULL; fvp = fromndp->ni_vp; if (ndstat) { vrele(fromndp->ni_dvp); vrele(fvp); error = ndstat; goto out1; } tdvp = tondp->ni_dvp; tvp = tondp->ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; goto out; } if (tvp->v_type == VDIR && tvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } /* * A rename to '.' or '..' results in a prematurely * unlocked vnode on FreeBSD5, so I'm just going to fail that * here. */ if ((tondp->ni_cnd.cn_namelen == 1 && tondp->ni_cnd.cn_nameptr[0] == '.') || (tondp->ni_cnd.cn_namelen == 2 && tondp->ni_cnd.cn_nameptr[0] == '.' && tondp->ni_cnd.cn_nameptr[1] == '.')) { error = EINVAL; goto out; } } if (fvp->v_type == VDIR && fvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp->v_mount != tdvp->v_mount) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp == tdvp) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; goto out; } if (fvp == tvp) { /* * If source and destination are the same, there is nothing to * do. Set error to -1 to indicate this. */ error = -1; goto out; } if (ndflag & ND_NFSV4) { if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(fvp, 0, p); NFSVOPUNLOCK(fvp, 0); } else error = EPERM; if (tvp && !error) error = nfsrv_checkremove(tvp, 1, p); } else { /* * For NFSv2 and NFSv3, try to get rid of the delegation, so * that the NFSv4 client won't be confused by the rename. * Since nfsd_recalldelegation() can only be called on an * unlocked vnode at this point and fvp is the file that will * still exist after the rename, just do fvp. */ nfsd_recalldelegation(fvp, p); } if (error == 0 && tvp != NULL) { nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname); NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" " dsdvp=%p\n", dsdvp[0]); } out: if (!error) { error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, &tondp->ni_cnd); } else { if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fromndp->ni_dvp); vrele(fvp); if (error == -1) error = 0; } /* * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and * if the rename succeeded, the DS file for the tvp needs to be * removed. */ if (dsdvp[0] != NULL) { if (error == 0) { nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, p); NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); } for (i = 0; i < mirrorcnt; i++) NFSVOPUNLOCK(dsdvp[i], 0); } vrele(tondp->ni_startdir); nfsvno_relpathbuf(tondp); out1: vrele(fromndp->ni_startdir); nfsvno_relpathbuf(fromndp); NFSEXITCODE(error); return (error); } /* * Link vnode op. */ int nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *xp; int error = 0; xp = ndp->ni_vp; if (xp != NULL) { error = EEXIST; } else { xp = ndp->ni_dvp; if (vp->v_mount != xp->v_mount) error = EXDEV; } if (!error) { NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) == 0) error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); else error = EPERM; if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); NFSVOPUNLOCK(vp, 0); } else { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vrele(ndp->ni_vp); } nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Do the fsync() appropriate for the commit. */ int nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, struct thread *td) { int error = 0; /* * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of * file is done. At this time VOP_FSYNC does not accept offset and * byte count parameters so call VOP_FSYNC the whole file for now. * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. * File systems that do not use the buffer cache (as indicated * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). */ if (cnt == 0 || cnt > MAX_COMMIT_COUNT || (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { /* * Give up and do the whole thing */ if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); } else { /* * Locate and synchronously write any buffers that fall * into the requested range. Note: we are assuming that * f_iosize is a power of 2. */ int iosize = vp->v_mount->mnt_stat.f_iosize; int iomask = iosize - 1; struct bufobj *bo; daddr_t lblkno; /* * Align to iosize boundary, super-align to page boundary. */ if (off & iomask) { cnt += off & iomask; off &= ~(u_quad_t)iomask; } if (off & PAGE_MASK) { cnt += off & PAGE_MASK; off &= ~(u_quad_t)PAGE_MASK; } lblkno = off / iosize; if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, off, off + cnt, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } bo = &vp->v_bufobj; BO_LOCK(bo); while (cnt > 0) { struct buf *bp; /* * If we have a buffer and it is marked B_DELWRI we * have to lock and write it. Otherwise the prior * write is assumed to have already been committed. * * gbincore() can return invalid buffers now so we * have to check that bit as well (though B_DELWRI * should not be set if B_INVAL is set there could be * a race here since we haven't locked the buffer). */ if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { BO_LOCK(bo); continue; /* retry */ } if ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI) { bremfree(bp); bp->b_flags &= ~B_ASYNC; bwrite(bp); ++nfs_commit_miss; } else BUF_UNLOCK(bp); BO_LOCK(bo); } ++nfs_commit_blks; if (cnt < iosize) break; cnt -= iosize; ++lblkno; } BO_UNLOCK(bo); } NFSEXITCODE(error); return (error); } /* * Statfs vnode op. */ int nfsvno_statfs(struct vnode *vp, struct statfs *sf) { int error; error = VFS_STATFS(vp->v_mount, sf); if (error == 0) { /* * Since NFS handles these values as unsigned on the * wire, there is no way to represent negative values, * so set them to 0. Without this, they will appear * to be very large positive values for clients like * Solaris10. */ if (sf->f_bavail < 0) sf->f_bavail = 0; if (sf->f_ffree < 0) sf->f_ffree = 0; } NFSEXITCODE(error); return (error); } /* * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but * must handle nfsrv_opencheck() calls after any other access checks. */ void nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, struct nfsexstuff *exp, struct vnode **vpp) { struct vnode *vp = NULL; u_quad_t tempsize; struct nfsexstuff nes; if (ndp->ni_vp == NULL) nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, NULL, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { if (ndp->ni_vp == NULL) { vrele(ndp->ni_startdir); nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); /* For a pNFS server, create the data file on a DS. */ if (nd->nd_repstat == 0) { /* * Create a data file on a DS for a pNFS server. * This function just returns if not * running a pNFS DS or the creation fails. */ nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, cred, p); } vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); if (!nd->nd_repstat) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, cred); if (nd->nd_repstat != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; nd->nd_repstat = NFSERR_NOTSUPP; } else NFSSETBIT_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS); } else { nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp); } } vp = ndp->ni_vp; } else { if (ndp->ni_startdir) vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vp = ndp->ni_vp; if (create == NFSV4OPEN_CREATE) { if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); } if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { if (ndp->ni_cnd.cn_flags & RDONLY) NFSVNO_SETEXRDONLY(&nes); else NFSVNO_EXINIT(&nes); nd->nd_repstat = nfsvno_accchk(vp, VWRITE, cred, &nes, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; nd->nd_repstat = VOP_SETATTR(vp, &nvap->na_vattr, cred); } } else if (vp->v_type == VREG) { nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); } } } else { if (ndp->ni_cnd.cn_flags & HASBUF) nfsvno_relpathbuf(ndp); if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { vrele(ndp->ni_startdir); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vput(ndp->ni_vp); } } *vpp = vp; NFSEXITCODE2(0, nd); } /* * Updates the file rev and sets the mtime and ctime * to the current clock time, returning the va_filerev and va_Xtime * values. * Return ESTALE to indicate the vnode is VI_DOOMED. */ int nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, struct nfsrv_descript *nd, struct thread *p) { struct vattr va; VATTR_NULL(&va); vfs_timestamp(&va.va_mtime); if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) != 0) return (ESTALE); } (void) VOP_SETATTR(vp, &va, nd->nd_cred); (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); return (0); } /* * Glue routine to nfsv4_fillattr(). */ int nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) { int error; error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, mounted_on_fileno); NFSEXITCODE2(0, nd); return (error); } /* Since the Readdir vnode ops vary, put the entire functions in here. */ /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR * - calls VOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The NFSM_CLGET macro is used here so that the reply will be packed * tightly in mbuf clusters. * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. * - it trims out records with d_type == DT_WHT * these cannot be seen through NFS (unless we extend the protocol) * The alternate call nfsrvd_readdirplus() does lookups as well. * PS: The NFS protocol spec. does not clarify what the "count" byte * argument is a count of.. just name strings and file id's or the * entire reply rpc or ... * I tried just file name and id sizes and it confused the Sun client, * so I am using the full rpc size now. The "paranoia.." comment refers * to including the status longwords that are not a part of the dir. * "entry" structures, but are in the rpc. */ int nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct thread *p, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct nfsvattr at; int nlen, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, ncookies; u_int64_t off, toff, verf; u_long *cookies = NULL, *cookiep; struct uio io; struct iovec iv; int is_ufs; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); off = fxdr_unsigned(u_quad_t, *tl++); } else { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; verf = fxdr_hyper(tl); tl += 2; } toff = off; cnt = fxdr_unsigned(int, *tl); if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); fullsiz = siz; if (nd->nd_flag & ND_NFSV3) { nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); #if 0 /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ if (!nd->nd_repstat && toff && verf != at.na_filerev) nd->nd_repstat = NFSERR_BAD_COOKIE; #endif } if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (nd->nd_repstat == 0 && cnt == 0) { if (nd->nd_flag & ND_NFSV2) /* NFSv2 does not have NFSERR_TOOSMALL */ nd->nd_repstat = EPERM; else nd->nd_repstat = NFSERR_TOOSMALL; } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { free((caddr_t)cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (nd->nd_flag & ND_NFSV3) { getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; } /* * Handles the failed cases. nd->nd_repstat == 0 past here. */ if (nd->nd_repstat) { vput(vp); free((caddr_t)rbuf, M_TEMP); if (cookies) free((caddr_t)cookies, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); } else { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; } *tl++ = newnfs_false; *tl = newnfs_true; FREE((caddr_t)rbuf, M_TEMP); FREE((caddr_t)cookies, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } vput(vp); /* * dirlen is the size of the reply, including all XDR and must * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate * if the XDR should be included in "count", but to be safe, we do. * (Include the two booleans at the end of the reply in dirlen now.) */ if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; } else { dirlen = 2 * NFSX_UNSIGNED; } /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN) { if (nd->nd_flag & ND_NFSV3) dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); else dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); if (dirlen > cnt) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; } else { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; } *tl = txdr_unsigned(dp->d_fileno); (void) nfsm_strtom(nd, dp->d_name, nlen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; } else NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(*cookiep); } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos < cend) eofflag = 0; NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; FREE((caddr_t)rbuf, M_TEMP); FREE((caddr_t)cookies, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Readdirplus for V3 and Readdir for V4. */ int nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct thread *p, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct vnode *nvp; fhandle_t nfh; struct nfsvattr nva, at, *nvap = &nva; struct mbuf *mb0, *mb1; struct nfsreferral *refp; int nlen, r, error = 0, getret = 1, usevget = 1; int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; caddr_t bpos0, bpos1; u_int64_t off, toff, verf; u_long *cookies = NULL, *cookiep; nfsattrbit_t attrbits, rderrbits, savbits; struct uio io; struct iovec iv; struct componentname cn; int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; struct mount *mp, *new_mp; uint64_t mounted_on_fileno; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); off = fxdr_hyper(tl); toff = off; tl += 2; verf = fxdr_hyper(tl); tl += 2; siz = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); /* * Use the server's maximum data transfer size as the upper bound * on reply datalen. */ if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); /* * siz is a "hint" of how much directory information (name, fileid, * cookie) should be in the reply. At least one client "hints" 0, * so I set it to cnt for that case. I also round it up to the * next multiple of DIRBLKSIZ. */ if (siz <= 0) siz = cnt; siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); if (nd->nd_flag & ND_NFSV4) { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; NFSSET_ATTRBIT(&savbits, &attrbits); NFSCLRNOTFILLABLE_ATTRBIT(&attrbits); NFSZERO_ATTRBIT(&rderrbits); NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); } else { NFSZERO_ATTRBIT(&attrbits); } fullsiz = siz; nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); if (!nd->nd_repstat) { if (off && verf != at.na_filerev) { /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ #if 0 if (nd->nd_flag & ND_NFSV4) { nd->nd_repstat = NFSERR_NOTSAME; } else { nd->nd_repstat = NFSERR_BAD_COOKIE; } #endif } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) { nd->nd_repstat = NFSERR_BAD_COOKIE; } } if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (!nd->nd_repstat && cnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { free((caddr_t)cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (!nd->nd_repstat) nd->nd_repstat = getret; if (nd->nd_repstat) { vput(vp); if (cookies) free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; *tl++ = newnfs_false; *tl = newnfs_true; free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || ((nd->nd_flag & ND_NFSV4) && ((dp->d_namlen == 1 && dp->d_name[0] == '.') || (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } /* * Busy the file system so that the mount point won't go away * and, as such, VFS_VGET() can be used safely. */ mp = vp->v_mount; vfs_ref(mp); NFSVOPUNLOCK(vp, 0); nd->nd_repstat = vfs_busy(mp, 0); vfs_rel(mp); if (nd->nd_repstat != 0) { vrele(vp); free(cookies, M_TEMP); free(rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * For now ZFS requires VOP_LOOKUP as a workaround. Until ino_t is changed * to 64 bit type a ZFS filesystem with over 1 billion files in it * will suffer from 64bit -> 32bit truncation. */ if (is_zfs == 1) usevget = 0; cn.cn_nameiop = LOOKUP; cn.cn_lkflags = LK_SHARED | LK_RETRY; cn.cn_cred = nd->nd_cred; cn.cn_thread = p; /* * Save this position, in case there is an error before one entry * is created. */ mb0 = nd->nd_mb; bpos0 = nd->nd_bpos; /* * Fill in the first part of the reply. * dirlen is the reply length in bytes and cannot exceed cnt. * (Include the two booleans at the end of the reply in dirlen now, * so we recognize when we have exceeded cnt.) */ if (nd->nd_flag & ND_NFSV3) { dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; nfsrv_postopattr(nd, getret, &at); } else { dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; } NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); txdr_hyper(at.na_filerev, tl); /* * Save this position, in case there is an empty reply needed. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; /* Loop through the records and build reply */ entrycnt = 0; while (cpos < cend && ncookies > 0 && dirlen < cnt) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN && ((nd->nd_flag & ND_NFSV3) || nlen > 2 || (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) || (nlen == 1 && dp->d_name[0] != '.'))) { /* * Save the current position in the reply, in case * this entry exceeds cnt. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; /* * For readdir_and_lookup get the vnode using * the file number. */ nvp = NULL; refp = NULL; r = 0; at_root = 0; needs_unbusy = 0; new_mp = mp; mounted_on_fileno = (uint64_t)dp->d_fileno; if ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&savbits)) { if (nd->nd_flag & ND_NFSV4) refp = nfsv4root_getreferral(NULL, vp, dp->d_fileno); if (refp == NULL) { if (usevget) r = VFS_VGET(mp, dp->d_fileno, LK_SHARED, &nvp); else r = EOPNOTSUPP; if (r == EOPNOTSUPP) { usevget = 0; cn.cn_nameptr = dp->d_name; cn.cn_namelen = nlen; cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF; if (nlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.') cn.cn_flags |= ISDOTDOT; if (NFSVOPLOCK(vp, LK_SHARED) != 0) { nd->nd_repstat = EPERM; break; } if ((vp->v_vflag & VV_ROOT) != 0 && (cn.cn_flags & ISDOTDOT) != 0) { vref(vp); nvp = vp; r = 0; } else { r = VOP_LOOKUP(vp, &nvp, &cn); if (vp != nvp) NFSVOPUNLOCK(vp, 0); } } /* * For NFSv4, check to see if nvp is * a mount point and get the mount * point vnode, as required. */ if (r == 0 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && nvp->v_mountedhere != NULL) { new_mp = nvp->v_mountedhere; r = vfs_busy(new_mp, 0); vput(nvp); nvp = NULL; if (r == 0) { r = VFS_ROOT(new_mp, LK_SHARED, &nvp); needs_unbusy = 1; if (r == 0) at_root = 1; } } } if (!r) { if (refp == NULL && ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&attrbits))) { r = nfsvno_getfh(nvp, &nfh, p); if (!r) r = nfsvno_getattr(nvp, nvap, nd, p, 1, &attrbits); if (r == 0 && is_zfs == 1 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && vp->v_mount != nvp->v_mount) { /* * For a ZFS snapshot, there is a * pseudo mount that does not set * v_mountedhere, so it needs to * be detected via a different * mount structure. */ at_root = 1; if (new_mp == mp) new_mp = nvp->v_mount; } } } else { nvp = NULL; } if (r) { if (!NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR)) { if (nvp != NULL) vput(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); nd->nd_repstat = r; break; } } } /* * Build the directory record xdr */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(dp->d_fileno); dirlen += nfsm_strtom(nd, dp->d_name, nlen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = txdr_unsigned(*cookiep); nfsrv_postopattr(nd, 0, nvap); dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); if (nvp != NULL) vput(nvp); } else { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(*cookiep); dirlen += nfsm_strtom(nd, dp->d_name, nlen); if (nvp != NULL) { supports_nfsv4acls = nfs_supportsnfsv4acls(nvp); NFSVOPUNLOCK(nvp, 0); } else supports_nfsv4acls = 0; if (refp != NULL) { dirlen += nfsrv_putreferralattr(nd, &savbits, refp, 0, &nd->nd_repstat); if (nd->nd_repstat) { if (nvp != NULL) vrele(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); break; } } else if (r) { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &rderrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } else { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &attrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } if (nvp != NULL) vrele(nvp); dirlen += (3 * NFSX_UNSIGNED); } if (needs_unbusy != 0) vfs_unbusy(new_mp); if (dirlen <= cnt) entrycnt++; } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } vrele(vp); vfs_unbusy(mp); /* * If dirlen > cnt, we must strip off the last entry. If that * results in an empty reply, report NFSERR_TOOSMALL. */ if (dirlen > cnt || nd->nd_repstat) { if (!nd->nd_repstat && entrycnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (nd->nd_repstat) { newnfs_trimtrailing(nd, mb0, bpos0); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); } else newnfs_trimtrailing(nd, mb1, bpos1); eofflag = 0; } else if (cpos < cend) eofflag = 0; if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; } FREE((caddr_t)cookies, M_TEMP); FREE((caddr_t)rbuf, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Get the settable attributes out of the mbuf list. * (Return 0 or EBADRPC) */ int nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; struct nfsv2_sattr *sp; int error = 0, toclient = 0; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); /* * Some old clients didn't fill in the high order 16bits. * --> check the low order 2 bytes for 0xffff */ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) nvap->na_mode = nfstov_mode(sp->sa_mode); if (sp->sa_uid != newnfs_xdrneg1) nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); if (sp->sa_gid != newnfs_xdrneg1) nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); if (sp->sa_size != newnfs_xdrneg1) nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { #ifdef notyet fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); #else nvap->na_atime.tv_sec = fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); nvap->na_atime.tv_nsec = 0; #endif } if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); break; case ND_NFSV3: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_uid = fxdr_unsigned(uid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_gid = fxdr_unsigned(gid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); nvap->na_size = fxdr_hyper(tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_atime); toclient = 1; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; break; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; break; } break; case ND_NFSV4: error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Handle the setable attributes for V4. * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. */ int nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; int attrsum = 0; int i, j; int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; int toclient = 0; u_char *cp, namestr[NFSV4_SMALLSTR + 1]; uid_t uid; gid_t gid; error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsize = fxdr_unsigned(int, *tl); /* * Loop around getting the setable attributes. If an unsupported * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. */ if (retnotsup) { nd->nd_repstat = NFSERR_ATTRNOTSUPP; bitpos = NFSATTRBIT_MAX; } else { bitpos = 0; } for (; bitpos < NFSATTRBIT_MAX; bitpos++) { if (attrsum > attrsize) { error = NFSERR_BADXDR; goto nfsmout; } if (NFSISSET_ATTRBIT(attrbitp, bitpos)) switch (bitpos) { case NFSATTRBIT_SIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (vp != NULL && vp->v_type != VREG) { error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL; goto nfsmout; } nvap->na_size = fxdr_hyper(tl); attrsum += NFSX_HYPER; break; case NFSATTRBIT_ACL: error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, p); if (error) goto nfsmout; if (aceerr && !nd->nd_repstat) nd->nd_repstat = aceerr; attrsum += aclsize; break; case NFSATTRBIT_ARCHIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HIDDEN: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MIMETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); break; case NFSATTRBIT_MODE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid, p); if (!nd->nd_repstat) nvap->na_uid = uid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_OWNERGROUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid, p); if (!nd->nd_repstat) nvap->na_gid = gid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_SYSTEM: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESSSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_atime); toclient = 1; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; } break; case NFSATTRBIT_TIMEBACKUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; } break; default: nd->nd_repstat = NFSERR_ATTRNOTSUPP; /* * set bitpos so we drop out of the loop. */ bitpos = NFSATTRBIT_MAX; break; } } /* * some clients pad the attrlist, so we need to skip over the * padding. */ if (attrsum > attrsize) { error = NFSERR_BADXDR; } else { attrsize = NFSM_RNDUP(attrsize); if (attrsum < attrsize) error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Check/setup export credentials. */ int nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, struct ucred *credanon) { int error = 0; /* * Check/setup credentials. */ if (nd->nd_flag & ND_GSS) exp->nes_exflag &= ~MNT_EXPORTANON; /* * Check to see if the operation is allowed for this security flavor. * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. * Also, allow Secinfo, so that it can acquire the correct flavor(s). */ if (nfsvno_testexp(nd, exp) && nd->nd_procnum != NFSV4OP_SECINFO && nd->nd_procnum != NFSPROC_FSINFO) { if (nd->nd_flag & ND_NFSV4) error = NFSERR_WRONGSEC; else error = (NFSERR_AUTHERR | AUTH_TOOWEAK); goto out; } /* * Check to see if the file system is exported V4 only. */ if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { error = NFSERR_PROGNOTV4; goto out; } /* * Now, map the user credentials. * (Note that ND_AUTHNONE will only be set for an NFSv3 * Fsinfo RPC. If set for anything else, this code might need * to change.) */ if (NFSVNO_EXPORTED(exp)) { if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || NFSVNO_EXPORTANON(exp) || (nd->nd_flag & ND_AUTHNONE) != 0) { nd->nd_cred->cr_uid = credanon->cr_uid; nd->nd_cred->cr_gid = credanon->cr_gid; crsetgroups(nd->nd_cred, credanon->cr_ngroups, credanon->cr_groups); } else if ((nd->nd_flag & ND_GSS) == 0) { /* * If using AUTH_SYS, call nfsrv_getgrpscred() to see * if there is a replacement credential with a group * list set up by "nfsuserd -manage-gids". * If there is no replacement, nfsrv_getgrpscred() * simply returns its argument. */ nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); } } out: NFSEXITCODE2(error, nd); return (error); } /* * Check exports. */ int nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, struct ucred **credp) { int i, error, *secflavors; error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, &secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } } else { /* Copy the security flavors. */ for (i = 0; i < exp->nes_numsecflavor; i++) exp->nes_secflavors[i] = secflavors[i]; } NFSEXITCODE(error); return (error); } /* * Get a vnode for a file handle and export stuff. */ int nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct ucred **credp) { int i, error, *secflavors; *credp = NULL; exp->nes_numsecflavor = 0; error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); if (error != 0) /* Make sure the server replies ESTALE to the client. */ error = ESTALE; if (nam && !error) { error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, &secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } else { vput(*vpp); } } else { /* Copy the security flavors. */ for (i = 0; i < exp->nes_numsecflavor; i++) exp->nes_secflavors[i] = secflavors[i]; } } NFSEXITCODE(error); return (error); } /* * nfsd_fhtovp() - convert a fh to a vnode ptr * - look up fsid in mount list (if not found ret error) * - get vp and export rights by calling nfsvno_fhtovp() * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon * for AUTH_SYS * - if mpp != NULL, return the mount point so that it can * be used for vn_finished_write() by the caller */ void nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct mount **mpp, int startwrite, struct thread *p) { struct mount *mp; struct ucred *credanon; fhandle_t *fhp; fhp = (fhandle_t *)nfp->nfsrvfh_data; /* * Check for the special case of the nfsv4root_fh. */ mp = vfs_busyfs(&fhp->fh_fsid); if (mpp != NULL) *mpp = mp; if (mp == NULL) { *vpp = NULL; nd->nd_repstat = ESTALE; goto out; } if (startwrite) { vn_start_write(NULL, mpp, V_WAIT); if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) lktype = LK_EXCLUSIVE; } nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, &credanon); vfs_unbusy(mp); /* * For NFSv4 without a pseudo root fs, unexported file handles * can be returned, so that Lookup works everywhere. */ if (!nd->nd_repstat && exp->nes_exflag == 0 && !(nd->nd_flag & ND_NFSV4)) { vput(*vpp); nd->nd_repstat = EACCES; } /* * Personally, I've never seen any point in requiring a * reserved port#, since only in the rare case where the * clients are all boxes with secure system privileges, * does it provide any enhanced security, but... some people * believe it to be useful and keep putting this code back in. * (There is also some "security checker" out there that * complains if the nfs server doesn't enforce this.) * However, note the following: * RFC3530 (NFSv4) specifies that a reserved port# not be * required. * RFC2623 recommends that, if a reserved port# is checked for, * that there be a way to turn that off--> ifdef'd. */ #ifdef NFS_REQRSVPORT if (!nd->nd_repstat) { struct sockaddr_in *saddr; struct sockaddr_in6 *saddr6; saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); if (!(nd->nd_flag & ND_NFSV4) && ((saddr->sin_family == AF_INET && ntohs(saddr->sin_port) >= IPPORT_RESERVED) || (saddr6->sin6_family == AF_INET6 && ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { vput(*vpp); nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); } } #endif /* NFS_REQRSVPORT */ /* * Check/setup credentials. */ if (!nd->nd_repstat) { nd->nd_saveduid = nd->nd_cred->cr_uid; nd->nd_repstat = nfsd_excred(nd, exp, credanon); if (nd->nd_repstat) vput(*vpp); } if (credanon != NULL) crfree(credanon); if (nd->nd_repstat) { if (startwrite) vn_finished_write(mp); *vpp = NULL; if (mpp != NULL) *mpp = NULL; } out: NFSEXITCODE2(0, nd); } /* * glue for fp. */ static int fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) { struct filedesc *fdp; struct file *fp; int error = 0; fdp = p->td_proc->p_fd; if (fd < 0 || fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { error = EBADF; goto out; } *fpp = fp; out: NFSEXITCODE(error); return (error); } /* * Called from nfssvc() to update the exports list. Just call * vfs_export(). This has to be done, since the v4 root fake fs isn't * in the mount list. */ int nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) { struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; int error = 0; struct nameidata nd; fhandle_t fh; error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) nfs_rootfhset = 0; else if (error == 0) { if (nfsexargp->fspec == NULL) { error = EPERM; goto out; } /* * If fspec != NULL, this is the v4root path. */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec, p); if ((error = namei(&nd)) != 0) goto out; error = nfsvno_getfh(nd.ni_vp, &fh, p); vrele(nd.ni_vp); if (!error) { nfs_rootfh.nfsrvfh_len = NFSX_MYFH; NFSBCOPY((caddr_t)&fh, nfs_rootfh.nfsrvfh_data, sizeof (fhandle_t)); nfs_rootfhset = 1; } } out: NFSEXITCODE(error); return (error); } /* * This function needs to test to see if the system is near its limit * for memory allocation via malloc() or mget() and return True iff * either of these resources are near their limit. * XXX (For now, this is just a stub.) */ int nfsrv_testmalloclimit = 0; int nfsrv_mallocmget_limit(void) { static int printmesg = 0; static int testval = 1; if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { if ((printmesg++ % 100) == 0) printf("nfsd: malloc/mget near limit\n"); return (1); } return (0); } /* * BSD specific initialization of a mount point. */ void nfsd_mntinit(void) { static int inited = 0; if (inited) return; inited = 1; nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist); nfsv4root_mnt.mnt_export = NULL; TAILQ_INIT(&nfsv4root_opt); TAILQ_INIT(&nfsv4root_newopt); nfsv4root_mnt.mnt_opt = &nfsv4root_opt; nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; nfsv4root_mnt.mnt_nvnodelistsize = 0; nfsv4root_mnt.mnt_activevnodelistsize = 0; } /* * Get a vnode for a file handle, without checking exports, etc. */ struct vnode * nfsvno_getvp(fhandle_t *fhp) { struct mount *mp; struct vnode *vp; int error; mp = vfs_busyfs(&fhp->fh_fsid); if (mp == NULL) return (NULL); error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error) return (NULL); return (vp); } /* * Do a local VOP_ADVLOCK(). */ int nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, u_int64_t end, struct thread *td) { int error = 0; struct flock fl; u_int64_t tlen; if (nfsrv_dolocallocks == 0) goto out; ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); fl.l_whence = SEEK_SET; fl.l_type = ftype; fl.l_start = (off_t)first; if (end == NFS64BITSSET) { fl.l_len = 0; } else { tlen = end - first; fl.l_len = (off_t)tlen; } /* * For FreeBSD8, the l_pid and l_sysid must be set to the same * values for all calls, so that all locks will be held by the * nfsd server. (The nfsd server handles conflicts between the * various clients.) * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 * bytes, so it can't be put in l_sysid. */ if (nfsv4_sysid == 0) nfsv4_sysid = nlm_acquire_next_sysid(); fl.l_pid = (pid_t)0; fl.l_sysid = (int)nfsv4_sysid; if (ftype == F_UNLCK) error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, (F_POSIX | F_REMOTE)); else error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, (F_POSIX | F_REMOTE)); out: NFSEXITCODE(error); return (error); } /* * Check the nfsv4 root exports. */ int nfsvno_v4rootexport(struct nfsrv_descript *nd) { struct ucred *credanon; int exflags, error = 0, numsecflavor, *secflavors, i; error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, &credanon, &numsecflavor, &secflavors); if (error) { error = NFSERR_PROGUNAVAIL; goto out; } if (credanon != NULL) crfree(credanon); for (i = 0; i < numsecflavor; i++) { if (secflavors[i] == AUTH_SYS) nd->nd_flag |= ND_EXAUTHSYS; else if (secflavors[i] == RPCSEC_GSS_KRB5) nd->nd_flag |= ND_EXGSS; else if (secflavors[i] == RPCSEC_GSS_KRB5I) nd->nd_flag |= ND_EXGSSINTEGRITY; else if (secflavors[i] == RPCSEC_GSS_KRB5P) nd->nd_flag |= ND_EXGSSPRIVACY; } out: NFSEXITCODE(error); return (error); } /* * Nfs server pseudo system call for the nfsd's */ /* * MPSAFE */ static int nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) { struct file *fp; struct nfsd_addsock_args sockarg; struct nfsd_nfsd_args nfsdarg; struct nfsd_nfsd_oargs onfsdarg; cap_rights_t rights; int error; char *cp; if (uap->flag & NFSSVC_NFSDADDSOCK) { error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); if (error) goto out; /* * Since we don't know what rights might be required, * pretend that we need them all. It is better to be too * careful than too reckless. */ error = fget(td, sockarg.sock, cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); if (error != 0) goto out; if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); error = EPERM; goto out; } error = nfsrvd_addsock(fp); fdrop(fp, td); } else if (uap->flag & NFSSVC_NFSDNFSD) { if (uap->argp == NULL) { error = EINVAL; goto out; } if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); if (error == 0) { nfsdarg.principal = onfsdarg.principal; nfsdarg.minthreads = onfsdarg.minthreads; nfsdarg.maxthreads = onfsdarg.maxthreads; nfsdarg.version = 1; nfsdarg.addr = NULL; nfsdarg.addrlen = 0; nfsdarg.dnshost = NULL; nfsdarg.dnshostlen = 0; nfsdarg.mirror = NULL; nfsdarg.mirrorlen = 0; } } else error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); if (error) goto out; if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && nfsdarg.mirrorlen > 0 && nfsdarg.mirrorlen < 10000 && nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && nfsdarg.dspath != NULL && nfsdarg.mirror != NULL) { NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" " mirrorlen=%d\n", nfsdarg.addrlen, nfsdarg.dspathlen, nfsdarg.dnshostlen, nfsdarg.mirrorlen); cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); if (error != 0) { free(cp, M_TEMP); goto out; } cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ nfsdarg.addr = cp; cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); if (error != 0) { free(nfsdarg.addr, M_TEMP); free(cp, M_TEMP); goto out; } cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ nfsdarg.dnshost = cp; cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); if (error != 0) { free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(cp, M_TEMP); goto out; } cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ nfsdarg.dspath = cp; cp = malloc(nfsdarg.mirrorlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.mirror, cp, nfsdarg.mirrorlen); if (error != 0) { free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(nfsdarg.dspath, M_TEMP); free(cp, M_TEMP); goto out; } cp[nfsdarg.mirrorlen] = '\0'; /* Ensure nul term. */ nfsdarg.mirror = cp; } else { nfsdarg.addr = NULL; nfsdarg.addrlen = 0; nfsdarg.dnshost = NULL; nfsdarg.dnshostlen = 0; nfsdarg.dspath = NULL; nfsdarg.dspathlen = 0; nfsdarg.mirror = NULL; nfsdarg.mirrorlen = 0; } error = nfsrvd_nfsd(td, &nfsdarg); free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(nfsdarg.dspath, M_TEMP); free(nfsdarg.mirror, M_TEMP); } else { error = nfssvc_srvcall(td, uap, td->td_ucred); } out: NFSEXITCODE(error); return (error); } static int nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { struct nfsex_args export; struct file *fp = NULL; int stablefd, len; struct nfsd_clid adminrevoke; struct nfsd_dumplist dumplist; struct nfsd_dumpclients *dumpclients; struct nfsd_dumplocklist dumplocklist; struct nfsd_dumplocks *dumplocks; struct nameidata nd; vnode_t vp; int error = EINVAL, igotlock; struct proc *procp; static int suspend_nfsd = 0; if (uap->flag & NFSSVC_PUBLICFH) { NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); error = copyin(uap->argp, &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); if (!error) nfs_pubfhset = 1; } else if (uap->flag & NFSSVC_V4ROOTEXPORT) { error = copyin(uap->argp,(caddr_t)&export, sizeof (struct nfsex_args)); if (!error) error = nfsrv_v4rootexport(&export, cred, p); } else if (uap->flag & NFSSVC_NOPUBLICFH) { nfs_pubfhset = 0; error = 0; } else if (uap->flag & NFSSVC_STABLERESTART) { error = copyin(uap->argp, (caddr_t)&stablefd, sizeof (int)); if (!error) error = fp_getfvp(p, stablefd, &fp, &vp); if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) error = EBADF; if (!error && newnfs_numnfsd != 0) error = EPERM; if (!error) { nfsrv_stablefirst.nsf_fp = fp; nfsrv_setupstable(p); } } else if (uap->flag & NFSSVC_ADMINREVOKE) { error = copyin(uap->argp, (caddr_t)&adminrevoke, sizeof (struct nfsd_clid)); if (!error) error = nfsrv_adminrevoke(&adminrevoke, p); } else if (uap->flag & NFSSVC_DUMPCLIENTS) { error = copyin(uap->argp, (caddr_t)&dumplist, sizeof (struct nfsd_dumplist)); if (!error && (dumplist.ndl_size < 1 || dumplist.ndl_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) { len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; dumpclients = (struct nfsd_dumpclients *)malloc(len, M_TEMP, M_WAITOK); nfsrv_dumpclients(dumpclients, dumplist.ndl_size); error = copyout(dumpclients, CAST_USER_ADDR_T(dumplist.ndl_list), len); free((caddr_t)dumpclients, M_TEMP); } } else if (uap->flag & NFSSVC_DUMPLOCKS) { error = copyin(uap->argp, (caddr_t)&dumplocklist, sizeof (struct nfsd_dumplocklist)); if (!error && (dumplocklist.ndllck_size < 1 || dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) error = nfsrv_lookupfilename(&nd, dumplocklist.ndllck_fname, p); if (!error) { len = sizeof (struct nfsd_dumplocks) * dumplocklist.ndllck_size; dumplocks = (struct nfsd_dumplocks *)malloc(len, M_TEMP, M_WAITOK); nfsrv_dumplocks(nd.ni_vp, dumplocks, dumplocklist.ndllck_size, p); vput(nd.ni_vp); error = copyout(dumplocks, CAST_USER_ADDR_T(dumplocklist.ndllck_list), len); free((caddr_t)dumplocks, M_TEMP); } } else if (uap->flag & NFSSVC_BACKUPSTABLE) { procp = p->td_proc; PROC_LOCK(procp); nfsd_master_pid = procp->p_pid; bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); nfsd_master_start = procp->p_stats->p_start; nfsd_master_proc = procp; PROC_UNLOCK(procp); } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd == 0) { /* Lock out all nfsd threads */ do { igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0 && suspend_nfsd == 0); suspend_nfsd = 1; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd != 0) { nfsv4_unlock(&nfsd_suspend_lock, 0); suspend_nfsd = 0; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } NFSEXITCODE(error); return (error); } /* * Check exports. * Returns 0 if ok, 1 otherwise. */ int nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) { int i; /* * This seems odd, but allow the case where the security flavor * list is empty. This happens when NFSv4 is traversing non-exported * file systems. Exported file systems should always have a non-empty * security flavor list. */ if (exp->nes_numsecflavor == 0) return (0); for (i = 0; i < exp->nes_numsecflavor; i++) { /* * The tests for privacy and integrity must be first, * since ND_GSS is set for everything but AUTH_SYS. */ if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && (nd->nd_flag & ND_GSSPRIVACY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && (nd->nd_flag & ND_GSSINTEGRITY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && (nd->nd_flag & ND_GSS)) return (0); if (exp->nes_secflavors[i] == AUTH_SYS && (nd->nd_flag & ND_GSS) == 0) return (0); } return (1); } /* * Calculate a hash value for the fid in a file handle. */ uint32_t nfsrv_hashfh(fhandle_t *fhp) { uint32_t hashval; hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); return (hashval); } /* * Calculate a hash value for the sessionid. */ uint32_t nfsrv_hashsessionid(uint8_t *sessionid) { uint32_t hashval; hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); return (hashval); } /* * Signal the userland master nfsd to backup the stable restart file. */ void nfsrv_backupstable(void) { struct proc *procp; if (nfsd_master_proc != NULL) { procp = pfind(nfsd_master_pid); /* Try to make sure it is the correct process. */ if (procp == nfsd_master_proc && procp->p_stats->p_start.tv_sec == nfsd_master_start.tv_sec && procp->p_stats->p_start.tv_usec == nfsd_master_start.tv_usec && strcmp(procp->p_comm, nfsd_master_comm) == 0) kern_psignal(procp, SIGUSR2); else nfsd_master_proc = NULL; if (procp != NULL) PROC_UNLOCK(procp); } } /* * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. * The arguments are in a structure, so that they can be passed through * kproc_create() for a kernel process to execute this function. */ struct nfsrvdscreate { struct ucred *tcred; struct vnode *dvp; NFSPROC_T *p; struct pnfsdsfile *pf; int haskproc; int err; fhandle_t fh; struct vattr va; struct vattr createva; }; static int nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, struct ucred *tcred, NFSPROC_T *p) { struct vnode *nvp; struct nameidata named; struct vattr va; char *bufp; u_long *hashp; struct nfsnode *np; struct nfsmount *nmp; int error; NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; named.ni_cnd.cn_thread = p; named.ni_cnd.cn_nameptr = bufp; named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); /* Create the date file in the DS mount. */ error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); if (error == 0) { error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); NFSVOPUNLOCK(dvp, 0); if (error == 0) { /* Set the ownership of the file. */ error = VOP_SETATTR(nvp, nvap, tcred); NFSD_DEBUG(4, "nfsrv_dscreate:" " setattr-uid=%d\n", error); if (error != 0) vput(nvp); } if (error != 0) printf("pNFS: pnfscreate failed=%d\n", error); } else printf("pNFS: pnfscreate vnlock=%d\n", error); if (error == 0) { np = VTONFS(nvp); nmp = VFSTONFS(nvp->v_mount); if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") != 0 || nmp->nm_nam->sa_len > sizeof( struct sockaddr_in6) || np->n_fhp->nfh_len != NFSX_MYFH) { printf("Bad DS file: fstype=%s salen=%d" " fhlen=%d\n", nvp->v_mount->mnt_vfc->vfc_name, nmp->nm_nam->sa_len, np->n_fhp->nfh_len); error = ENOENT; } /* Set extattrs for the DS on the MDS file. */ if (error == 0) { if (dsa != NULL) { error = VOP_GETATTR(nvp, &va, tcred); if (error == 0) { dsa->dsa_filerev = va.va_filerev; dsa->dsa_size = va.va_size; dsa->dsa_atime = va.va_atime; dsa->dsa_mtime = va.va_mtime; } } if (error == 0) { NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, nmp->nm_nam->sa_len); NFSBCOPY(named.ni_cnd.cn_nameptr, pf->dsf_filename, sizeof(pf->dsf_filename)); } } else printf("pNFS: pnfscreate can't get DS" " attr=%d\n", error); vput(nvp); } nfsvno_relpathbuf(&named); return (error); } /* * Start up the thread that will execute nfsrv_dscreate(). */ static void start_dscreate(void *arg) { struct nfsrvdscreate *dsc; dsc = (struct nfsrvdscreate *)arg; dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, dsc->pf, NULL, dsc->tcred, dsc->p); NFSDSCLOCK(); dsc->haskproc = 0; wakeup(dsc); NFSDSCUNLOCK(); kproc_exit(0); } /* * Create a pNFS data file on the Data Server(s). */ static void nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, NFSPROC_T *p) { struct nfsrvdscreate *dsc, *tdsc; struct nfsdevice *ds, *mds; struct mount *mp; struct pnfsdsfile *pf, *tpf; struct pnfsdsattr dsattr; struct vattr va; struct vnode *dvp[NFSDEV_MAXMIRRORS]; fhandle_t fh; uid_t vauid; gid_t vagid; u_short vamode; struct ucred *tcred; int dsdir[NFSDEV_MAXMIRRORS], error, haskproc, i, mirrorcnt, ret; /* Get a DS server directory in a round-robin order. */ mirrorcnt = 1; NFSDDSLOCK(); ds = TAILQ_FIRST(&nfsrv_devidhead); if (ds == NULL) { NFSDDSUNLOCK(); NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); return; } /* Put at end of list to implement round-robin usage. */ TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); i = dsdir[0] = ds->nfsdev_nextdir; ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; dvp[0] = ds->nfsdev_dsdir[i]; TAILQ_FOREACH(mds, &ds->nfsdev_mirrors, nfsdev_list) { i = dsdir[mirrorcnt] = mds->nfsdev_nextdir; mds->nfsdev_nextdir = (mds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; dvp[mirrorcnt] = mds->nfsdev_dsdir[i]; mirrorcnt++; } NFSDDSUNLOCK(); dsc = NULL; if (mirrorcnt > 1) tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, M_WAITOK | M_ZERO); tpf = pf = malloc(sizeof(*pf) * mirrorcnt, M_TEMP, M_WAITOK | M_ZERO); error = nfsvno_getfh(vp, &fh, p); if (error == 0) error = VOP_GETATTR(vp, &va, cred); if (error == 0) { /* Set the three attributes for "vp" to Setattr the DS vp. */ vauid = va.va_uid; vagid = va.va_gid; vamode = va.va_mode; VATTR_NULL(&va); va.va_uid = vauid; va.va_gid = vagid; va.va_mode = vamode; } else printf("pNFS: pnfscreate getfh+attr=%d\n", error); NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, cred->cr_gid); /* Make data file name based on FH. */ tcred = newnfs_getcred(); /* * Create the file on each DS mirror, using kernel process(es) for the * additional mirrors. */ haskproc = 0; for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { tpf->dsf_dir = dsdir[i]; tdsc->tcred = tcred; tdsc->p = p; tdsc->pf = tpf; tdsc->createva = *vap; tdsc->fh = fh; tdsc->va = va; tdsc->dvp = dvp[i]; tdsc->haskproc = 1; ret = kproc_create(start_dscreate, (void *)tdsc, NULL, 0, 0, "nfsdpcr"); if (ret == 0) haskproc = 1; else { tdsc->haskproc = 0; ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, tcred, p); if (error == 0 && ret != 0) error = ret; } } if (error == 0) { tpf->dsf_dir = dsdir[mirrorcnt - 1]; error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, &dsattr, tcred, p); } if (haskproc != 0) { /* Wait for kernel proc(s) to complete. */ NFSDSCLOCK(); for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { while (tdsc->haskproc != 0) mtx_sleep(tdsc, NFSDSCLOCKMUTEXPTR, PVFS, "nfspcr", 0); if (error == 0 && tdsc->err != 0) error = tdsc->err; } NFSDSCUNLOCK(); } NFSFREECRED(tcred); if (error == 0) { error = vn_start_write(vp, &mp, V_WAIT); if (error == 0) { error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf) * mirrorcnt, (char *)pf, p); if (error == 0) error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); vn_finished_write(mp); if (error != 0) printf("pNFS: pnfscreate setextattr=%d\n", error); } else printf("pNFS: pnfscreate startwrite=%d\n", error); } else printf("pNFS: pnfscreate=%d\n", error); free(pf, M_TEMP); free(dsc, M_TEMP); } /* * Get the information needed to remove the pNFS Data Server file from the * Metadata file. Upon success, ddvp is set non-NULL to the locked * DS directory vnode. The caller must unlock *ddvp when done with it. */ static void nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, int *mirrorcntp, char *fname) { struct nfsmount *nmp[NFSDEV_MAXMIRRORS]; struct vattr va; struct ucred *tcred; char *buf; int buflen, error; dvpp[0] = NULL; /* If not an exported regular file or not a pNFS server, just return. */ NFSDDSLOCK(); if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || TAILQ_EMPTY(&nfsrv_devidhead)) { NFSDDSUNLOCK(); return; } NFSDDSUNLOCK(); /* Check to see if this is the last hard link. */ tcred = newnfs_getcred(); error = VOP_GETATTR(vp, &va, tcred); NFSFREECRED(tcred); if (error != 0) { printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); return; } if (va.va_nlink > 1) return; buflen = 1024; buf = malloc(buflen, M_TEMP, M_WAITOK); /* Get the directory vnode for the DS mount and the file handle. */ error = nfsrv_dsgetsockmnt(vp, LK_EXCLUSIVE, buf, buflen, mirrorcntp, p, dvpp, nmp, NULL, NULL, fname); free(buf, M_TEMP); if (error != 0) printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); } /* * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. * The arguments are in a structure, so that they can be passed through * kproc_create() for a kernel process to execute this function. */ struct nfsrvdsremove { struct ucred *tcred; struct vnode *dvp; NFSPROC_T *p; int haskproc; char fname[PNFS_FILENAME_LEN + 1]; }; static void nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, NFSPROC_T *p) { struct nameidata named; struct vnode *nvp; char *bufp; u_long *hashp; int error; named.ni_cnd.cn_nameiop = DELETE; named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; named.ni_cnd.cn_cred = tcred; named.ni_cnd.cn_thread = p; named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; nfsvno_setpathbuf(&named, &bufp, &hashp); named.ni_cnd.cn_nameptr = bufp; named.ni_cnd.cn_namelen = strlen(fname); strlcpy(bufp, fname, NAME_MAX); NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); if (error == 0) { error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); vput(nvp); } nfsvno_relpathbuf(&named); if (error != 0) printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); } /* * Start up the thread that will execute nfsrv_dsremove(). */ static void start_dsremove(void *arg) { struct nfsrvdsremove *dsrm; dsrm = (struct nfsrvdsremove *)arg; nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, dsrm->p); NFSDSRMLOCK(); dsrm->haskproc = 0; wakeup(dsrm); NFSDSRMUNLOCK(); kproc_exit(0); } /* * Remove a pNFS data file from a Data Server. * nfsrv_pnfsremovesetup() must have been called before the MDS file was * removed to set up the dvp and fill in the FH. */ static void nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, NFSPROC_T *p) { struct ucred *tcred; struct nfsrvdsremove *dsrm, *tdsrm; int haskproc, i, ret; tcred = newnfs_getcred(); dsrm = NULL; if (mirrorcnt > 1) dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); /* * Remove the file on each DS mirror, using kernel process(es) for the * additional mirrors. */ haskproc = 0; for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { tdsrm->tcred = tcred; tdsrm->p = p; tdsrm->dvp = dvp[i]; strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); tdsrm->haskproc = 1; ret = kproc_create(start_dsremove, (void *)tdsrm, NULL, 0, 0, "nfsdprm"); if (ret == 0) haskproc = 1; else { tdsrm->haskproc = 0; nfsrv_dsremove(dvp[i], fname, tcred, p); } } nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); if (haskproc != 0) { /* Wait for kernel proc(s) to complete. */ NFSDSRMLOCK(); for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { while (tdsrm->haskproc != 0) mtx_sleep(tdsrm, NFSDSRMLOCKMUTEXPTR, PVFS, "nfsprm", 0); } NFSDSRMUNLOCK(); } NFSFREECRED(tcred); free(dsrm, M_TEMP); } /* * Generate a file name based on the file handle and put it in *bufp. * Return the number of bytes generated. */ static int nfsrv_putfhname(fhandle_t *fhp, char *bufp) { int i; uint8_t *cp; const uint8_t *hexdigits = "0123456789abcdef"; cp = (uint8_t *)fhp; for (i = 0; i < sizeof(*fhp); i++) { bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; } bufp[2 * i] = '\0'; return (2 * i); } /* * Update the Metadata file's attributes from the DS file when a Read/Write * layout is returned. * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. */ int nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) { struct ucred *tcred; int error; /* Do this as root so that it won't fail with EACCES. */ tcred = newnfs_getcred(); error = nfsrv_proxyds(NULL, vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, NULL, NULL, NULL, nap, NULL); NFSFREECRED(tcred); return (error); } /* * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. */ int nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, NFSPROC_T *p) { int error; error = nfsrv_proxyds(NULL, vp, 0, 0, cred, p, NFSPROC_SETACL, NULL, NULL, NULL, NULL, aclp); return (error); } static int nfsrv_proxyds(struct nfsrv_descript *nd, struct vnode *vp, off_t off, int cnt, struct ucred *cred, struct thread *p, int ioproc, struct mbuf **mpp, char *cp, struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp) { struct nfsmount *nmp[NFSDEV_MAXMIRRORS]; fhandle_t fh[NFSDEV_MAXMIRRORS]; struct vnode *dvp[NFSDEV_MAXMIRRORS]; struct pnfsdsattr dsattr; char *buf; int buflen, error, i, mirrorcnt; NFSD_DEBUG(4, "in nfsrv_proxyds\n"); /* * If not a regular file, not exported or not a pNFS server, * just return ENOENT. */ NFSDDSLOCK(); if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || TAILQ_EMPTY(&nfsrv_devidhead)) { NFSDDSUNLOCK(); return (ENOENT); } NFSDDSUNLOCK(); buflen = 1024; buf = malloc(buflen, M_TEMP, M_WAITOK); error = 0; /* * For Getattr, get the Change attribute (va_filerev) and size (va_size) * from the MetaData file's extended attribute. */ if (ioproc == NFSPROC_GETATTR) { error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, p); if (error == 0 && buflen != sizeof(dsattr)) error = ENXIO; if (error == 0) { NFSBCOPY(buf, &dsattr, buflen); nap->na_filerev = dsattr.dsa_filerev; nap->na_size = dsattr.dsa_size; nap->na_atime = dsattr.dsa_atime; nap->na_mtime = dsattr.dsa_mtime; /* * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() * returns 0, just return now. nfsrv_checkdsattr() * returns 0 if there is no Read/Write layout * plus either an Open/Write_access or Write * delegation issued to a client for the file. */ if (nfsrv_pnfsgetdsattr == 0 || nfsrv_checkdsattr(nd, vp, p) == 0) { free(buf, M_TEMP); return (error); } } /* * Clear ENOATTR so the code below will attempt to do a * nfsrv_getattrdsrpc() to get the attributes and (re)create * the extended attribute. */ if (error == ENOATTR) error = 0; } if (error == 0) { buflen = 1024; error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, buflen, &mirrorcnt, p, dvp, nmp, fh, NULL, NULL); if (error != 0) printf("pNFS: proxy getextattr sockaddr=%d\n", error); } else printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); if (error == 0) { if (ioproc == NFSPROC_READDS) error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], mpp, mpp2); else if (ioproc == NFSPROC_WRITEDS) error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, &nmp[0], mirrorcnt, mpp, cp); else if (ioproc == NFSPROC_SETATTR) error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], mirrorcnt, nap); else if (ioproc == NFSPROC_SETACL) error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], mirrorcnt, aclp); else error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, vp, nmp[mirrorcnt - 1], nap); for (i = 0; i < mirrorcnt; i++) NFSVOPUNLOCK(dvp[i], 0); NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d\n", error); } else { /* Return ENOENT for any Extended Attribute error. */ error = ENOENT; } free(buf, M_TEMP); NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); return (error); } /* * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended * attribute. */ static int nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int buflen, int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, struct nfsmount **nmpp, fhandle_t *fhp, char *devid, char *fnamep) { struct vnode *dvp, **tdvpp; struct nfsmount *nmp; struct sockaddr *sad; struct nfsdevice *ds, *mds; struct pnfsdsfile *pf; uint32_t dsdir; int done, error, fhiszero, gotone, i, mirrorcnt; *mirrorcntp = 1; fhiszero = 0; tdvpp = dvpp; if (lktype == 0) lktype = LK_SHARED; if (dvpp != NULL) { *dvpp = NULL; *nmpp = NULL; } error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", &buflen, buf, p); mirrorcnt = buflen / sizeof(*pf); if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || buflen != sizeof(*pf) * mirrorcnt)) error = ENOATTR; pf = (struct pnfsdsfile *)buf; gotone = 0; for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { sad = (struct sockaddr *)&pf->dsf_sin; dsdir = pf->dsf_dir; if (dsdir >= nfsrv_dsdirsize) { printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); error = ENOATTR; } if (error == 0) { if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) fhiszero = 1; /* Use the socket address to find the mount point. */ done = 0; NFSDDSLOCK(); TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { TAILQ_FOREACH(mds, &ds->nfsdev_mirrors, nfsdev_list) { dvp = mds->nfsdev_dvp; nmp = VFSTONFS(dvp->v_mount); if (nfsaddr2_match(sad, nmp->nm_nam)) { ds = mds; done = 1; break; } } if (done != 0) break; dvp = ds->nfsdev_dvp; nmp = VFSTONFS(dvp->v_mount); if (nfsaddr2_match(sad, nmp->nm_nam)) break; } NFSDDSUNLOCK(); if (ds != NULL) { if (dvpp != NULL || fhiszero != 0) { dvp = ds->nfsdev_dsdir[dsdir]; error = vn_lock(dvp, lktype); /* * If the file handle is all 0's, try to * do a Lookup against the DS to acquire * it. * If dvpp == NULL or the Lookup fails, * unlock dvp after the call. */ if (error == 0 && fhiszero != 0) { error = nfsrv_pnfslookupds(vp, pf, dvp, p); if (error != 0 || dvpp == NULL) NFSVOPUNLOCK(dvp, 0); } } if (devid != NULL) { NFSBCOPY(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID); devid += NFSX_V4DEVICEID; } if (error == 0) { gotone++; if (dvpp != NULL) { *tdvpp++ = dvp; *nmpp++ = nmp; } if (fhp != NULL) NFSBCOPY(&pf->dsf_fh, fhp++, NFSX_MYFH); if (fnamep != NULL && i == 0) strlcpy(fnamep, pf->dsf_filename, sizeof(pf->dsf_filename)); } else NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " "err=%d\n", error); } } } if (error == 0 && gotone == 0) error = ENOENT; if (error == 0) *mirrorcntp = gotone; else if (gotone > 0 && dvpp != NULL) { /* * If the error didn't occur on the first one and dvpp != NULL, * the one(s) prior to the failure will have locked dvp's that * need to be unlocked. */ for (i = 0; i < gotone; i++) { NFSVOPUNLOCK(*dvpp, 0); *dvpp++ = NULL; } } return (error); } /* * Set the extended attribute for the Change attribute. */ static int nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) { struct pnfsdsattr dsattr; struct mount *mp; int error; error = vn_start_write(vp, &mp, V_WAIT); if (error == 0) { dsattr.dsa_filerev = nap->na_filerev; dsattr.dsa_size = nap->na_size; dsattr.dsa_atime = nap->na_atime; dsattr.dsa_mtime = nap->na_mtime; error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); vn_finished_write(mp); } if (error != 0) printf("pNFS: setextattr=%d\n", error); return (error); } static int nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) { uint32_t *tl; struct nfsrv_descript *nd; nfsv4stateid_t st; struct mbuf *m, *m2; int error = 0, retlen, tlen, trimlen; NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); *mpp = NULL; /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), NULL, NULL, 0, 0); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); txdr_hyper(off, tl); *(tl + 2) = txdr_unsigned(len); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); NFSM_STRSIZ(retlen, len); if (retlen > 0) { /* Trim off the pre-data XDR from the mbuf chain. */ m = nd->nd_mrep; while (m != NULL && m != nd->nd_md) { if (m->m_next == nd->nd_md) { m->m_next = NULL; m_freem(nd->nd_mrep); nd->nd_mrep = m = nd->nd_md; } else m = m->m_next; } if (m == NULL) { printf("nfsrv_readdsrpc: busted mbuf list\n"); error = ENOENT; goto nfsmout; } /* * Now, adjust first mbuf so that any XDR before the * read data is skipped over. */ trimlen = nd->nd_dpos - mtod(m, char *); if (trimlen > 0) { m->m_len -= trimlen; NFSM_DATAP(m, trimlen); } /* * Truncate the mbuf chain at retlen bytes of data, * plus XDR padding that brings the length up to a * multiple of 4. */ tlen = NFSM_RNDUP(retlen); do { if (m->m_len >= tlen) { m->m_len = tlen; tlen = 0; m2 = m->m_next; m->m_next = NULL; m_freem(m2); break; } tlen -= m->m_len; m = m->m_next; } while (m != NULL); if (tlen > 0) { printf("nfsrv_readdsrpc: busted mbuf list\n"); error = ENOENT; goto nfsmout; } *mpp = nd->nd_mrep; *mpendp = m; nd->nd_mrep = NULL; } } else error = nd->nd_repstat; nfsmout: /* If nd->nd_mrep is already NULL, this is a no-op. */ m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); return (error); } /* * Do a write RPC on a DS data file, using this structure for the arguments, * so that this function can be executed by a separate kernel process. */ struct nfsrvwritedsdorpc { + int done; + int inprog; + struct task tsk; fhandle_t fh; off_t off; int len; struct nfsmount *nmp; struct ucred *cred; NFSPROC_T *p; struct mbuf *m; - int haskproc; int err; }; static int nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript *nd; nfsattrbit_t attrbits; nfsv4stateid_t st; int commit, error, retlen; nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); txdr_hyper(off, tl); tl += 2; /* * Do all writes FileSync, since the server doesn't hold onto dirty * buffers. Since clients should be accessing the DS servers directly * using the pNFS layouts, this just needs to work correctly as a * fallback. */ *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); *tl = txdr_unsigned(len); NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); /* Put data in mbuf chain. */ nd->nd_mb->m_next = m; /* Set nd_mb and nd_bpos to end of data. */ while (m->m_next != NULL) m = m->m_next; nd->nd_mb = m; nd->nd_bpos = mtod(m, char *) + m->m_len; NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); /* Do a Getattr for Size, Change and Modify Time. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); /* Get rid of weak cache consistency data for now. */ if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); if (error != 0) goto nfsmout; /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); retlen = fxdr_unsigned(int, *tl++); commit = fxdr_unsigned(int, *tl); if (commit != NFSWRITE_FILESYNC) error = NFSERR_IO; NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", retlen, commit, error); } else error = nd->nd_repstat; /* We have no use for the Write Verifier since we use FileSync. */ /* * Get the Change, Size, Access Time and Modify Time attributes and set * on the Metadata file, so its attributes will be what the file's * would be if it had been written. */ if (error == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); return (error); } /* * Start up the thread that will execute nfsrv_writedsdorpc(). */ static void -start_writedsdorpc(void *arg) +start_writedsdorpc(void *arg, int pending) { struct nfsrvwritedsdorpc *drpc; drpc = (struct nfsrvwritedsdorpc *)arg; drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, drpc->len, NULL, drpc->m, drpc->cred, drpc->p); - NFSDWRPCLOCK(); - drpc->haskproc = 0; - wakeup(drpc); - NFSDWRPCUNLOCK(); - kproc_exit(0); + drpc->done = 1; + NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); } static int nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct mbuf **mpp, char *cp) { struct nfsrvwritedsdorpc *drpc, *tdrpc; struct nfsvattr na; struct mbuf *m; - int error, haskproc, i, offs, ret; + int error, i, offs, ret, timo; NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* Calculate offset in mbuf chain that data starts. */ offs = cp - mtod(*mpp, char *); NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len); /* * Do the write RPC for every DS, using a separate kernel process * for every DS except the last one. */ - haskproc = 0; error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + tdrpc->done = 0; tdrpc->fh = *fhp; tdrpc->off = off; tdrpc->len = len; tdrpc->nmp = *nmpp; tdrpc->cred = cred; tdrpc->p = p; + tdrpc->inprog = 0; + tdrpc->err = 0; tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); - tdrpc->haskproc = 1; - ret = kproc_create(start_writedsdorpc, (void *)tdrpc, NULL, 0, - 0, "nfsdpw"); - if (ret == 0) - haskproc = 1; - else { - tdrpc->haskproc = 0; + ret = EIO; + if (nfs_pnfsiothreads > 0) { + ret = nfs_pnfsio(start_writedsdorpc, tdrpc); + NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", + ret); + } + if (ret != 0) { ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, tdrpc->m, cred, p); if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); if (error == 0 && ret != 0) error = ret; if (error == 0) error = nfsrv_setextattr(vp, &na, p); NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); - if (haskproc != 0) { - /* Wait for kernel proc(s) to complete. */ - NFSDWRPCLOCK(); - for (tdrpc = drpc, i = 0; i < mirrorcnt - 1; i++, tdrpc++) { - while (tdrpc->haskproc != 0) - mtx_sleep(tdrpc, NFSDWRPCLOCKMUTEXPTR, PVFS, - "nfspw", 0); - if (error == 0 && tdrpc->err != 0) - error = tdrpc->err; - } - NFSDWRPCUNLOCK(); + tdrpc = drpc; + timo = hz / 50; /* Wait for 20msec. */ + if (timo < 1) + timo = 1; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + /* Wait for RPCs on separate threads to complete. */ + while (tdrpc->inprog != 0 && tdrpc->done == 0) + tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); + if (error == 0 && tdrpc->err != 0) + error = tdrpc->err; } free(drpc, M_TEMP); return (error); } static int nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, struct nfsvattr *dsnap) { uint32_t *tl; struct nfsrv_descript *nd; nfsv4stateid_t st; nfsattrbit_t attrbits; int error; NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), NULL, NULL, 0, 0); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); /* Do a Getattr for Size, Change, Access Time and Modify Time. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", nd->nd_repstat); /* Get rid of weak cache consistency data for now. */ if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); if (error != 0) goto nfsmout; /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; } error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error != 0) goto nfsmout; if (nd->nd_repstat != 0) error = nd->nd_repstat; /* * Get the Change, Size, Access Time and Modify Time attributes and set * on the Metadata file, so its attributes will be what the file's * would be if it had been written. */ if (error == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); return (error); } struct nfsrvsetattrdsdorpc { + int done; + int inprog; + struct task tsk; fhandle_t fh; struct nfsmount *nmp; struct vnode *vp; struct ucred *cred; NFSPROC_T *p; struct nfsvattr na; struct nfsvattr dsna; - int haskproc; int err; }; /* * Start up the thread that will execute nfsrv_setattrdsdorpc(). */ static void -start_setattrdsdorpc(void *arg) +start_setattrdsdorpc(void *arg, int pending) { struct nfsrvsetattrdsdorpc *drpc; drpc = (struct nfsrvsetattrdsdorpc *)arg; drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); - NFSDSRPCLOCK(); - drpc->haskproc = 0; - wakeup(drpc); - NFSDSRPCUNLOCK(); - kproc_exit(0); + drpc->done = 1; } static int nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct nfsvattr *nap) { struct nfsrvsetattrdsdorpc *drpc, *tdrpc; struct nfsvattr na; - int error, haskproc, i, ret; + int error, i, ret, timo; NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* * Do the setattr RPC for every DS, using a separate kernel process * for every DS except the last one. */ - haskproc = 0; error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + tdrpc->done = 0; + tdrpc->inprog = 0; tdrpc->fh = *fhp; tdrpc->nmp = *nmpp; tdrpc->vp = vp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->na = *nap; - tdrpc->haskproc = 1; - ret = kproc_create(start_setattrdsdorpc, (void *)tdrpc, NULL, 0, - 0, "nfsdps"); - if (ret == 0) - haskproc = 1; - else { - tdrpc->haskproc = 0; + tdrpc->err = 0; + ret = EIO; + if (nfs_pnfsiothreads > 0) { + ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); + NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", + ret); + } + if (ret != 0) { ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); if (error == 0 && ret != 0) error = ret; if (error == 0) error = nfsrv_setextattr(vp, &na, p); NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); - if (haskproc != 0) { - /* Wait for kernel proc(s) to complete. */ - NFSDSRPCLOCK(); - for (tdrpc = drpc, i = 0; i < mirrorcnt - 1; i++, tdrpc++) { - while (tdrpc->haskproc != 0) - mtx_sleep(tdrpc, NFSDSRPCLOCKMUTEXPTR, PVFS, - "nfsps", 0); - if (error == 0 && tdrpc->err != 0) - error = tdrpc->err; - } - NFSDSRPCUNLOCK(); + tdrpc = drpc; + timo = hz / 50; /* Wait for 20msec. */ + if (timo < 1) + timo = 1; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + /* Wait for RPCs on separate threads to complete. */ + while (tdrpc->inprog != 0 && tdrpc->done == 0) + tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); + if (error == 0 && tdrpc->err != 0) + error = tdrpc->err; } free(drpc, M_TEMP); return (error); } /* * Do a Setattr of an NFSv4 ACL on the DS file. */ static int nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) { struct nfsrv_descript *nd; nfsv4stateid_t st; nfsattrbit_t attrbits; int error; NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), NULL, NULL, 0, 0); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); /* * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), * so passing in the metadata "vp" will be ok, since it is of * the same type (VREG). */ nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, 0); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", nd->nd_repstat); error = nd->nd_repstat; m_freem(nd->nd_mrep); free(nd, M_TEMP); return (error); } struct nfsrvsetacldsdorpc { + int done; + int inprog; + struct task tsk; fhandle_t fh; struct nfsmount *nmp; struct vnode *vp; struct ucred *cred; NFSPROC_T *p; struct acl *aclp; - int haskproc; int err; }; /* * Start up the thread that will execute nfsrv_setacldsdorpc(). */ static void -start_setacldsdorpc(void *arg) +start_setacldsdorpc(void *arg, int pending) { struct nfsrvsetacldsdorpc *drpc; drpc = (struct nfsrvsetacldsdorpc *)arg; drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, drpc->vp, drpc->nmp, drpc->aclp); - NFSDARPCLOCK(); - drpc->haskproc = 0; - wakeup(drpc); - NFSDARPCUNLOCK(); - kproc_exit(0); + drpc->done = 1; } static int nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp) { struct nfsrvsetacldsdorpc *drpc, *tdrpc; - int error, haskproc, i, ret; + int error, i, ret, timo; NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* * Do the setattr RPC for every DS, using a separate kernel process * for every DS except the last one. */ - haskproc = 0; error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + tdrpc->done = 0; + tdrpc->inprog = 0; tdrpc->fh = *fhp; tdrpc->nmp = *nmpp; tdrpc->vp = vp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->aclp = aclp; - tdrpc->haskproc = 1; - ret = kproc_create(start_setacldsdorpc, (void *)tdrpc, NULL, 0, - 0, "nfsdpa"); - if (ret == 0) - haskproc = 1; - else { - tdrpc->haskproc = 0; + tdrpc->err = 0; + ret = EIO; + if (nfs_pnfsiothreads > 0) { + ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); + NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", + ret); + } + if (ret != 0) { ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); if (error == 0 && ret != 0) error = ret; NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); - if (haskproc != 0) { - /* Wait for kernel proc(s) to complete. */ - NFSDARPCLOCK(); - for (tdrpc = drpc, i = 0; i < mirrorcnt - 1; i++, tdrpc++) { - while (tdrpc->haskproc != 0) - mtx_sleep(tdrpc, NFSDARPCLOCKMUTEXPTR, PVFS, - "nfspa", 0); - if (error == 0 && tdrpc->err != 0) - error = tdrpc->err; - } - NFSDARPCUNLOCK(); + tdrpc = drpc; + timo = hz / 50; /* Wait for 20msec. */ + if (timo < 1) + timo = 1; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + /* Wait for RPCs on separate threads to complete. */ + while (tdrpc->inprog != 0 && tdrpc->done == 0) + tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); + if (error == 0 && tdrpc->err != 0) + error = tdrpc->err; } free(drpc, M_TEMP); return (error); } /* * Getattr call to the DS for the Modify, Size and Change attributes. */ static int nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) { struct nfsrv_descript *nd; int error; nfsattrbit_t attrbits; NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); (void) nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", nd->nd_repstat); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); if (error == 0) { error = nfsrv_setextattr(vp, nap, p); NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", error); } } else error = nd->nd_repstat; m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); return (error); } /* * Get the device id and file handle for a DS file. */ int nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, fhandle_t *fhp, char *devid) { int buflen, error; char *buf; buflen = 1024; buf = malloc(buflen, M_TEMP, M_WAITOK); error = nfsrv_dsgetsockmnt(vp, 0, buf, buflen, mirrorcntp, p, NULL, NULL, fhp, devid, NULL); free(buf, M_TEMP); return (error); } /* * Do a Lookup against the DS for the filename and set the file handle * to the correct one, if successful. */ static int nfsrv_pnfslookupds(struct vnode *vp, struct pnfsdsfile *pf, struct vnode *dvp, NFSPROC_T *p) { struct nameidata named; struct ucred *tcred; struct mount *mp; char *bufp; u_long *hashp; struct vnode *nvp; struct nfsnode *np; int error, ret; tcred = newnfs_getcred(); named.ni_cnd.cn_nameiop = LOOKUP; named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; named.ni_cnd.cn_cred = tcred; named.ni_cnd.cn_thread = p; named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; nfsvno_setpathbuf(&named, &bufp, &hashp); named.ni_cnd.cn_nameptr = bufp; named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); strlcpy(bufp, pf->dsf_filename, NAME_MAX); NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); NFSFREECRED(tcred); nfsvno_relpathbuf(&named); if (error == 0) { np = VTONFS(nvp); NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); vput(nvp); /* * We can only do a setextattr for an exclusively * locked vp. Instead of trying to upgrade a shared * lock, just leave dsf_fh zeroed out and it will * keep doing this lookup until it is done with an * exclusively locked vp. */ if (NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) { ret = vn_start_write(vp, &mp, V_WAIT); NFSD_DEBUG(4, "nfsrv_pnfslookupds: vn_start_write=%d\n", ret); if (ret == 0) { ret = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf), (char *)pf, p); vn_finished_write(mp); NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft " "vn_extattr_set=%d\n", ret); } } } NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); return (error); } extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfsd_modevent(module_t mod, int type, void *data) { int error = 0, i; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) goto out; newnfs_portinit(); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, MTX_DEF); mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, MTX_DEF); } mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); nfsrvd_initcache(); nfsd_init(); NFSD_LOCK(); nfsrvd_init(0); NFSD_UNLOCK(); nfsd_mntinit(); #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; #endif nfsd_call_servertimer = nfsrv_servertimer; nfsd_call_nfsd = nfssvc_nfsd; loaded = 1; break; case MOD_UNLOAD: if (newnfs_numnfsd != 0) { error = EBUSY; break; } #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = NULL; vn_deleg_ops.vndeleg_disable = NULL; #endif nfsd_call_servertimer = NULL; nfsd_call_nfsd = NULL; /* Clean out all NFSv4 state. */ nfsrv_throwawayallstate(curthread); /* Clean the NFS server reply cache */ nfsrvd_cleancache(); /* Free up the krpc server pool. */ if (nfsrvd_pool != NULL) svcpool_destroy(nfsrvd_pool); /* and get rid of the locks */ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_destroy(&nfsrchash_table[i].mtx); mtx_destroy(&nfsrcahash_table[i].mtx); } mtx_destroy(&nfsrc_udpmtx); mtx_destroy(&nfs_v4root_mutex); mtx_destroy(&nfsv4root_mnt.mnt_mtx); for (i = 0; i < nfsrv_sessionhashsize; i++) mtx_destroy(&nfssessionhash[i].mtx); for (i = 0; i < nfsrv_layouthashsize; i++) mtx_destroy(&nfslayouthash[i].mtx); lockdestroy(&nfsv4root_mnt.mnt_explock); free(nfsclienthash, M_NFSDCLIENT); free(nfslockhash, M_NFSDLOCKFILE); free(nfssessionhash, M_NFSDSESSION); free(nfslayouthash, M_NFSDSESSION); loaded = 0; break; default: error = EOPNOTSUPP; break; } out: NFSEXITCODE(error); return (error); } static moduledata_t nfsd_mod = { "nfsd", nfsd_modevent, NULL, }; DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfsd, 1); MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); MODULE_DEPEND(nfsd, nfslock, 1, 1, 1); MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); MODULE_DEPEND(nfsd, krpc, 1, 1, 1); MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); Index: projects/pnfs-planb-server-stable11/sys/fs/nfsserver/nfs_nfsdserv.c =================================================================== --- projects/pnfs-planb-server-stable11/sys/fs/nfsserver/nfs_nfsdserv.c (revision 324529) +++ projects/pnfs-planb-server-stable11/sys/fs/nfsserver/nfs_nfsdserv.c (revision 324530) @@ -1,4673 +1,4673 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * nfs version 2, 3 and 4 server calls to vnode ops * - these routines generally have 3 phases * 1 - break down and validate rpc request in mbuf list * 2 - do the vnode ops for the request, usually by calling a nfsvno_XXX() * function in nfsd_port.c * 3 - build the rpc reply in an mbuf list * For nfsv4, these functions are called for each Op within the Compound RPC. */ #ifndef APPLEKEXT #include /* Global vars */ extern u_int32_t newnfs_false, newnfs_true; extern enum vtype nv34tov_type[8]; extern struct timeval nfsboottime; extern int nfs_rootfhset; extern int nfsrv_enable_crossmntpt; extern int nfsrv_statehashsize; extern int nfsrv_layouthashsize; extern time_t nfsdev_time; extern struct nfsdevicehead nfsrv_devidhead; extern int nfsd_debuglevel; extern u_long sb_max_adj; extern int nfsrv_pnfsatime; extern int nfsrv_maxpnfsmirror; #endif /* !APPLEKEXT */ static int nfs_async = 0; SYSCTL_DECL(_vfs_nfsd); SYSCTL_INT(_vfs_nfsd, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "Tell client that writes were synced even though they were not"); extern int nfsrv_doflexfile; SYSCTL_INT(_vfs_nfsd, OID_AUTO, default_flexfile, CTLFLAG_RW, &nfsrv_doflexfile, 0, "Make Flex File Layout the default for pNFS"); /* * This list defines the GSS mechanisms supported. * (Don't ask me how you get these strings from the RFC stuff like * iso(1), org(3)... but someone did it, so I don't need to know.) */ static struct nfsgss_mechlist nfsgss_mechlist[] = { { 9, "\052\206\110\206\367\022\001\002\002", 11 }, { 0, "", 0 }, }; /* local functions */ static void nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp, int pathlen); static void nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp); /* * nfs access service (not a part of NFS V2) */ APPLESTATIC int nfsrvd_access(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int getret, error = 0; struct nfsvattr nva; u_int32_t testmode, nfsmode, supported = 0; accmode_t deletebit; if (nd->nd_repstat) { nfsrv_postopattr(nd, 1, &nva); goto out; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nfsmode = fxdr_unsigned(u_int32_t, *tl); if ((nd->nd_flag & ND_NFSV4) && (nfsmode & ~(NFSACCESS_READ | NFSACCESS_LOOKUP | NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE | NFSACCESS_EXECUTE))) { nd->nd_repstat = NFSERR_INVAL; vput(vp); goto out; } if (nfsmode & NFSACCESS_READ) { supported |= NFSACCESS_READ; if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_READ; } if (nfsmode & NFSACCESS_MODIFY) { supported |= NFSACCESS_MODIFY; if (nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_MODIFY; } if (nfsmode & NFSACCESS_EXTEND) { supported |= NFSACCESS_EXTEND; if (nfsvno_accchk(vp, VWRITE | VAPPEND, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_EXTEND; } if (nfsmode & NFSACCESS_DELETE) { supported |= NFSACCESS_DELETE; if (vp->v_type == VDIR) deletebit = VDELETE_CHILD; else deletebit = VDELETE; if (nfsvno_accchk(vp, deletebit, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_DELETE; } if (vnode_vtype(vp) == VDIR) testmode = NFSACCESS_LOOKUP; else testmode = NFSACCESS_EXECUTE; if (nfsmode & testmode) { supported |= (nfsmode & testmode); if (nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~testmode; } nfsmode &= supported; if (nd->nd_flag & ND_NFSV3) { getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); nfsrv_postopattr(nd, getret, &nva); } vput(vp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(supported); } else NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsmode); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs getattr service */ APPLESTATIC int nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { struct nfsvattr nva; fhandle_t fh; int at_root = 0, error = 0, supports_nfsv4acls; struct nfsreferral *refp; nfsattrbit_t attrbits, tmpbits; struct mount *mp; struct vnode *tvp = NULL; struct vattr va; uint64_t mounted_on_fileno = 0; accmode_t accmode; if (nd->nd_repstat) goto out; if (nd->nd_flag & ND_NFSV4) { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) { vput(vp); goto out; } /* * Check for a referral. */ refp = nfsv4root_getreferral(vp, NULL, 0); if (refp != NULL) { (void) nfsrv_putreferralattr(nd, &attrbits, refp, 1, &nd->nd_repstat); vput(vp); goto out; } if (nd->nd_repstat == 0) { accmode = 0; NFSSET_ATTRBIT(&tmpbits, &attrbits); /* * GETATTR with write-only attr time_access_set and time_modify_set * should return NFS4ERR_INVAL. */ if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEACCESSSET) || NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEMODIFYSET)){ error = NFSERR_INVAL; vput(vp); goto out; } if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_ACL)) { NFSCLRBIT_ATTRBIT(&tmpbits, NFSATTRBIT_ACL); accmode |= VREAD_ACL; } if (NFSNONZERO_ATTRBIT(&tmpbits)) accmode |= VREAD_ATTRIBUTES; if (accmode != 0) nd->nd_repstat = nfsvno_accchk(vp, accmode, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); } } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_FILEHANDLE)) nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) nd->nd_repstat = nfsrv_checkgetattr(nd, vp, &nva, &attrbits, p); if (nd->nd_repstat == 0) { supports_nfsv4acls = nfs_supportsnfsv4acls(vp); mp = vp->v_mount; if (nfsrv_enable_crossmntpt != 0 && vp->v_type == VDIR && (vp->v_vflag & VV_ROOT) != 0 && vp != rootvnode) { tvp = mp->mnt_vnodecovered; VREF(tvp); at_root = 1; } else at_root = 0; vfs_ref(mp); NFSVOPUNLOCK(vp, 0); if (at_root != 0) { if ((nd->nd_repstat = NFSVOPLOCK(tvp, LK_SHARED)) == 0) { nd->nd_repstat = VOP_GETATTR( tvp, &va, nd->nd_cred); vput(tvp); } else vrele(tvp); if (nd->nd_repstat == 0) mounted_on_fileno = (uint64_t) va.va_fileid; else at_root = 0; } if (nd->nd_repstat == 0) nd->nd_repstat = vfs_busy(mp, 0); vfs_rel(mp); if (nd->nd_repstat == 0) { (void)nfsvno_fillattr(nd, mp, vp, &nva, &fh, 0, &attrbits, nd->nd_cred, p, isdgram, 1, supports_nfsv4acls, at_root, mounted_on_fileno); vfs_unbusy(mp); } vrele(vp); } else vput(vp); } else { nfsrv_fillattr(nd, &nva); vput(vp); } } else { vput(vp); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs setattr service */ APPLESTATIC int nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, nva2; u_int32_t *tl; int preat_ret = 1, postat_ret = 1, gcheck = 0, error = 0; int gotproxystateid; struct timespec guard = { 0, 0 }; nfsattrbit_t attrbits, retbits; nfsv4stateid_t stateid; NFSACL_T *aclp = NULL; if (nd->nd_repstat) { nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); goto out; } #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif gotproxystateid = 0; NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); stateid.other[0] = *tl++; stateid.other[1] = *tl++; stateid.other[2] = *tl; if (stateid.other[0] == 0x55555555 && stateid.other[1] == 0x55555555 && stateid.other[2] == 0x55555555 && stateid.seqid == 0xffffffff) gotproxystateid = 1; } error = nfsrv_sattr(nd, vp, &nva, &attrbits, aclp, p); if (error) goto nfsmout; /* For NFSv4, only va_uid is used from nva2. */ NFSZERO_ATTRBIT(&retbits); NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER); preat_ret = nfsvno_getattr(vp, &nva2, nd, p, 1, &retbits); if (!nd->nd_repstat) nd->nd_repstat = preat_ret; NFSZERO_ATTRBIT(&retbits); if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); gcheck = fxdr_unsigned(int, *tl); if (gcheck) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &guard); } if (!nd->nd_repstat && gcheck && (nva2.na_ctime.tv_sec != guard.tv_sec || nva2.na_ctime.tv_nsec != guard.tv_nsec)) nd->nd_repstat = NFSERR_NOT_SYNC; if (nd->nd_repstat) { vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); goto out; } } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); /* * Now that we have all the fields, lets do it. * If the size is being changed write access is required, otherwise * just check for a read only file system. */ if (!nd->nd_repstat) { if (NFSVNO_NOTSETSIZE(&nva)) { if (NFSVNO_EXRDONLY(exp) || (vfs_flags(vnode_mount(vp)) & MNT_RDONLY)) nd->nd_repstat = EROFS; } else { if (vnode_vtype(vp) != VREG) nd->nd_repstat = EINVAL; else if (nva2.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp)) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); } } /* * Proxy operations from the MDS are allowed via the all 0s special * stateid. */ if (nd->nd_repstat == 0 && (nd->nd_flag & ND_NFSV4) != 0 && gotproxystateid == 0) nd->nd_repstat = nfsrv_checksetattr(vp, nd, &stateid, &nva, &attrbits, exp, p); if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) { /* * For V4, try setting the attrbutes in sets, so that the * reply bitmap will be correct for an error case. */ if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER) || NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, uid, nva.na_uid); NFSVNO_SETATTRVAL(&nva2, gid, nva.na_gid); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER); if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNERGROUP); } } if (!nd->nd_repstat && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SIZE)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, size, nva.na_size); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SIZE); } if (!nd->nd_repstat && (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET) || NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET))) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, atime, nva.na_atime); NFSVNO_SETATTRVAL(&nva2, mtime, nva.na_mtime); if (nva.na_vaflags & VA_UTIMES_NULL) { nva2.na_vaflags |= VA_UTIMES_NULL; NFSVNO_SETACTIVE(&nva2, vaflags); } nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEACCESSSET); if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEMODIFYSET); } } if (!nd->nd_repstat && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODE)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, mode, nva.na_mode); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODE); } #ifdef NFS4_ACL_EXTATTR_NAME if (!nd->nd_repstat && aclp->acl_cnt > 0 && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_ACL)) { nd->nd_repstat = nfsrv_setacl(vp, aclp, nd->nd_cred, p); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_ACL); } #endif } else if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_setattr(vp, &nva, nd->nd_cred, p, exp); } if (nd->nd_flag & (ND_NFSV2 | ND_NFSV3)) { postat_ret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = postat_ret; } vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); else if (nd->nd_flag & ND_NFSV4) (void) nfsrv_putattrbit(nd, &retbits); else if (!nd->nd_repstat) nfsrv_fillattr(nd, &nva); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV4) { /* * For all nd_repstat, the V4 reply includes a bitmap, * even NFSERR_BADXDR, which is what this will end up * returning. */ (void) nfsrv_putattrbit(nd, &retbits); } NFSEXITCODE2(error, nd); return (error); } /* * nfs lookup rpc * (Also performs lookup parent for v4) */ APPLESTATIC int nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nameidata named; vnode_t vp, dirp = NULL; int error = 0, dattr_ret = 1; struct nfsvattr nva, dattr; char *bufp; u_long *hashp; if (nd->nd_repstat) { nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } /* * For some reason, if dp is a symlink, the error * returned is supposed to be NFSERR_SYMLINK and not NFSERR_NOTDIR. */ if (dp->v_type == VLNK && (nd->nd_flag & ND_NFSV4)) { nd->nd_repstat = NFSERR_SYMLINK; vrele(dp); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (nd->nd_repstat) { if (dirp) { if (nd->nd_flag & ND_NFSV3) dattr_ret = nfsvno_getattr(dirp, &dattr, nd, p, 0, NULL); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } if (named.ni_startdir) vrele(named.ni_startdir); nfsvno_relpathbuf(&named); vp = named.ni_vp; if ((nd->nd_flag & ND_NFSV4) != 0 && !NFSVNO_EXPORTED(exp) && vp->v_type != VDIR && vp->v_type != VLNK) /* * Only allow lookup of VDIR and VLNK for traversal of * non-exported volumes during NFSv4 mounting. */ nd->nd_repstat = ENOENT; if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (vpp != NULL && nd->nd_repstat == 0) *vpp = vp; else vput(vp); if (dirp) { if (nd->nd_flag & ND_NFSV3) dattr_ret = nfsvno_getattr(dirp, &dattr, nd, p, 0, NULL); vrele(dirp); } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } if (nd->nd_flag & ND_NFSV2) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_fillattr(nd, &nva); } else if (nd->nd_flag & ND_NFSV3) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_postopattr(nd, 0, &nva); nfsrv_postopattr(nd, dattr_ret, &dattr); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs readlink service */ APPLESTATIC int nfsrvd_readlink(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; mbuf_t mp = NULL, mpend = NULL; int getret = 1, len; struct nfsvattr nva; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &nva); goto out; } if (vnode_vtype(vp) != VLNK) { if (nd->nd_flag & ND_NFSV2) nd->nd_repstat = ENXIO; else nd->nd_repstat = EINVAL; } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_readlink(vp, nd->nd_cred, p, &mp, &mpend, &len); if (nd->nd_flag & ND_NFSV3) getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); if (nd->nd_repstat) goto out; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(len); mbuf_setnext(nd->nd_mb, mp); nd->nd_mb = mpend; nd->nd_bpos = NFSMTOD(mpend, caddr_t) + mbuf_len(mpend); out: NFSEXITCODE2(0, nd); return (0); } /* * nfs read service */ APPLESTATIC int nfsrvd_read(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int error = 0, cnt, getret = 1, gotproxystateid, reqlen, eof = 0; mbuf_t m2, m3; struct nfsvattr nva; off_t off = 0x0; struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; nfsv4stateid_t stateid; nfsquad_t clientid; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &nva); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_int32_t, *tl++); reqlen = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; reqlen = fxdr_unsigned(int, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3*NFSX_UNSIGNED); reqlen = fxdr_unsigned(int, *(tl + 6)); } if (reqlen > NFS_SRVMAXDATA(nd)) { reqlen = NFS_SRVMAXDATA(nd); } else if (reqlen < 0) { error = EBADRPC; goto nfsmout; } gotproxystateid = 0; if (nd->nd_flag & ND_NFSV4) { stp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS); lop->lo_flags = NFSLCK_READ; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK1 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } stp->ls_stateid.other[2] = *tl++; /* * Don't allow the client to use a special stateid for a DS op. */ if ((nd->nd_flag & ND_DSSERVER) != 0 && ((stp->ls_stateid.other[0] == 0x0 && stp->ls_stateid.other[1] == 0x0 && stp->ls_stateid.other[2] == 0x0) || (stp->ls_stateid.other[0] == 0xffffffff && stp->ls_stateid.other[1] == 0xffffffff && stp->ls_stateid.other[2] == 0xffffffff) || stp->ls_stateid.seqid != 0)) nd->nd_repstat = NFSERR_BADSTATEID; /* However, allow the proxy stateid. */ if (stp->ls_stateid.seqid == 0xffffffff && stp->ls_stateid.other[0] == 0x55555555 && stp->ls_stateid.other[1] == 0x55555555 && stp->ls_stateid.other[2] == 0x55555555) gotproxystateid = 1; off = fxdr_hyper(tl); lop->lo_first = off; tl += 2; lop->lo_end = off + reqlen; /* * Paranoia, just in case it wraps around. */ if (lop->lo_end < off) lop->lo_end = NFS64BITSSET; } if (vnode_vtype(vp) != VREG) { if (nd->nd_flag & ND_NFSV3) nd->nd_repstat = EINVAL; else nd->nd_repstat = (vnode_vtype(vp) == VDIR) ? EISDIR : EINVAL; } getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; if (!nd->nd_repstat && (nva.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } /* * DS reads are marked by ND_DSSERVER or use the proxy special * stateid. */ if (nd->nd_repstat == 0 && (nd->nd_flag & (ND_NFSV4 | ND_DSSERVER)) == ND_NFSV4 && gotproxystateid == 0) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); goto out; } if (off >= nva.na_size) { cnt = 0; eof = 1; } else if (reqlen == 0) cnt = 0; else if ((off + reqlen) >= nva.na_size) { cnt = nva.na_size - off; eof = 1; } else cnt = reqlen; m3 = NULL; if (cnt > 0) { nd->nd_repstat = nfsvno_read(vp, off, cnt, nd->nd_cred, p, &m3, &m2); if (!(nd->nd_flag & ND_NFSV4)) { getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; } if (nd->nd_repstat) { vput(vp); if (m3) mbuf_freem(m3); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); goto out; } } vput(vp); if (nd->nd_flag & ND_NFSV2) { nfsrv_fillattr(nd, &nva); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); } else { if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &nva); NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(cnt); } else NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (eof) *tl++ = newnfs_true; else *tl++ = newnfs_false; } *tl = txdr_unsigned(cnt); if (m3) { mbuf_setnext(nd->nd_mb, m3); nd->nd_mb = m2; nd->nd_bpos = NFSMTOD(m2, caddr_t) + mbuf_len(m2); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs write service */ APPLESTATIC int nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { int i, cnt; u_int32_t *tl; mbuf_t mp; struct nfsvattr nva, forat; int aftat_ret = 1, retlen, len, error = 0, forat_ret = 1; int gotproxystateid, stable = NFSWRITE_FILESYNC; off_t off; struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; nfsv4stateid_t stateid; nfsquad_t clientid; nfsattrbit_t attrbits; if (nd->nd_repstat) { nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); goto out; } gotproxystateid = 0; if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_int32_t, *++tl); tl += 2; retlen = len = fxdr_unsigned(int32_t, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 3; stable = fxdr_unsigned(int, *tl++); retlen = len = fxdr_unsigned(int32_t, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 4 * NFSX_UNSIGNED); stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK2 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } stp->ls_stateid.other[2] = *tl++; /* * Don't allow the client to use a special stateid for a DS op. */ if ((nd->nd_flag & ND_DSSERVER) != 0 && ((stp->ls_stateid.other[0] == 0x0 && stp->ls_stateid.other[1] == 0x0 && stp->ls_stateid.other[2] == 0x0) || (stp->ls_stateid.other[0] == 0xffffffff && stp->ls_stateid.other[1] == 0xffffffff && stp->ls_stateid.other[2] == 0xffffffff) || stp->ls_stateid.seqid != 0)) nd->nd_repstat = NFSERR_BADSTATEID; /* However, allow the proxy stateid. */ if (stp->ls_stateid.seqid == 0xffffffff && stp->ls_stateid.other[0] == 0x55555555 && stp->ls_stateid.other[1] == 0x55555555 && stp->ls_stateid.other[2] == 0x55555555) gotproxystateid = 1; off = fxdr_hyper(tl); lop->lo_first = off; tl += 2; stable = fxdr_unsigned(int, *tl++); retlen = len = fxdr_unsigned(int32_t, *tl); lop->lo_end = off + len; /* * Paranoia, just in case it wraps around, which shouldn't * ever happen anyhow. */ if (lop->lo_end < lop->lo_first) lop->lo_end = NFS64BITSSET; } /* * Loop through the mbuf chain, counting how many mbufs are a * part of this write operation, so the iovec size is known. */ cnt = 0; mp = nd->nd_md; i = NFSMTOD(mp, caddr_t) + mbuf_len(mp) - nd->nd_dpos; while (len > 0) { if (i > 0) { len -= i; cnt++; } mp = mbuf_next(mp); if (!mp) { if (len > 0) { error = EBADRPC; goto nfsmout; } } else i = mbuf_len(mp); } if (retlen > NFS_SRVMAXIO || retlen < 0) nd->nd_repstat = EIO; if (vnode_vtype(vp) != VREG && !nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nd->nd_repstat = EINVAL; else nd->nd_repstat = (vnode_vtype(vp) == VDIR) ? EISDIR : EINVAL; } NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); forat_ret = nfsvno_getattr(vp, &forat, nd, p, 1, &attrbits); if (!nd->nd_repstat) nd->nd_repstat = forat_ret; if (!nd->nd_repstat && (forat.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); /* * DS reads are marked by ND_DSSERVER or use the proxy special * stateid. */ if (nd->nd_repstat == 0 && (nd->nd_flag & (ND_NFSV4 | ND_DSSERVER)) == ND_NFSV4 && gotproxystateid == 0) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); goto out; } /* * For NFS Version 2, it is not obvious what a write of zero length * should do, but I might as well be consistent with Version 3, * which is to return ok so long as there are no permission problems. */ if (retlen > 0) { nd->nd_repstat = nfsvno_write(vp, off, retlen, cnt, &stable, nd->nd_md, nd->nd_dpos, nd->nd_cred, p); error = nfsm_advance(nd, NFSM_RNDUP(retlen), -1); if (error) - panic("nfsrv_write mbuf"); + goto nfsmout; } if (nd->nd_flag & ND_NFSV4) aftat_ret = 0; else aftat_ret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); vput(vp); if (!nd->nd_repstat) nd->nd_repstat = aftat_ret; if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); if (nd->nd_repstat) goto out; NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(retlen); /* * If nfs_async is set, then pretend the write was FILESYNC. * Warning: Doing this violates RFC1813 and runs a risk * of data written by a client being lost when the server * crashes/reboots. */ if (stable == NFSWRITE_UNSTABLE && nfs_async == 0) *tl++ = txdr_unsigned(stable); else *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(nfsboottime.tv_usec); } else if (!nd->nd_repstat) nfsrv_fillattr(nd, &nva); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs create service (creates regular files for V2 and V3. Spec. files for V2.) * now does a truncate to 0 length via. setattr if it already exists * The core creation routine has been extracted out into nfsrv_creatsub(), * so it can also be used by nfsrv_open() for V4. */ APPLESTATIC int nfsrvd_create(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nfsv2_sattr *sp; struct nameidata named; u_int32_t *tl; int error = 0, tsize, dirfor_ret = 1, diraft_ret = 1; int how = NFSCREATE_UNCHECKED, exclusive_flag = 0; NFSDEV_T rdev = 0; vnode_t vp = NULL, dirp = NULL; fhandle_t fh; char *bufp; u_long *hashp; enum vtype vtyp; int32_t cverf[2], tverf[2] = { 0, 0 }; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vtyp = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode)); if (vtyp == VNON) vtyp = VREG; NFSVNO_SETATTRVAL(&nva, type, vtyp); NFSVNO_SETATTRVAL(&nva, mode, nfstov_mode(sp->sa_mode)); switch (nva.na_type) { case VREG: tsize = fxdr_unsigned(int32_t, sp->sa_size); if (tsize != -1) NFSVNO_SETATTRVAL(&nva, size, (u_quad_t)tsize); break; case VCHR: case VBLK: case VFIFO: rdev = fxdr_unsigned(NFSDEV_T, sp->sa_size); break; default: break; } } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSCREATE_GUARDED: case NFSCREATE_UNCHECKED: error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p); if (error) goto nfsmout; break; case NFSCREATE_EXCLUSIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; exclusive_flag = 1; break; } NFSVNO_SETATTRVAL(&nva, type, VREG); } } if (nd->nd_repstat) { nfsvno_relpathbuf(&named); if (nd->nd_flag & ND_NFSV3) { dirfor_ret = nfsvno_getattr(dp, &dirfor, nd, p, 1, NULL); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } vput(dp); goto out; } nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); if (dirp) { if (nd->nd_flag & ND_NFSV2) { vrele(dirp); dirp = NULL; } else { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); } } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); goto out; } if (!(nd->nd_flag & ND_NFSV2)) { switch (how) { case NFSCREATE_GUARDED: if (named.ni_vp) nd->nd_repstat = EEXIST; break; case NFSCREATE_UNCHECKED: break; case NFSCREATE_EXCLUSIVE: if (named.ni_vp == NULL) NFSVNO_SETATTRVAL(&nva, mode, 0); break; } } /* * Iff doesn't exist, create it * otherwise just truncate to 0 length * should I set the mode too ? */ nd->nd_repstat = nfsvno_createsub(nd, &named, &vp, &nva, &exclusive_flag, cverf, rdev, p, exp); if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); vput(vp); if (!nd->nd_repstat) { tverf[0] = nva.na_atime.tv_sec; tverf[1] = nva.na_atime.tv_nsec; } } if (nd->nd_flag & ND_NFSV2) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0); nfsrv_fillattr(nd, &nva); } } else { if (exclusive_flag && !nd->nd_repstat && (cverf[0] != tverf[0] || cverf[1] != tverf[1])) nd->nd_repstat = EEXIST; diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(dp); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } /* * nfs v3 mknod service (and v4 create) */ APPLESTATIC int nfsrvd_mknod(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; u_int32_t *tl; struct nameidata named; int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen; u_int32_t major, minor; enum vtype vtyp = VNON; nfstype nfs4type = NFNON; vnode_t vp, dirp = NULL; nfsattrbit_t attrbits; char *bufp = NULL, *pathcp = NULL; u_long *hashp, cnflags; NFSACL_T *aclp = NULL; NFSVNO_ATTRINIT(&nva); cnflags = (LOCKPARENT | SAVESTART); if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif /* * For V4, the creation stuff is here, Yuck! */ if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); vtyp = nfsv34tov_type(*tl); nfs4type = fxdr_unsigned(nfstype, *tl); switch (nfs4type) { case NFLNK: error = nfsvno_getsymlink(nd, &nva, p, &pathcp, &pathlen); if (error) goto nfsmout; break; case NFCHR: case NFBLK: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_int32_t, *tl++); minor = fxdr_unsigned(u_int32_t, *tl); nva.na_rdev = NFSMAKEDEV(major, minor); break; case NFSOCK: case NFFIFO: break; case NFDIR: cnflags = (LOCKPARENT | SAVENAME); break; default: nd->nd_repstat = NFSERR_BADTYPE; vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif goto out; } } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, cnflags | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); vtyp = nfsv34tov_type(*tl); } error = nfsrv_sattr(nd, NULL, &nva, &attrbits, aclp, p); if (error) goto nfsmout; nva.na_type = vtyp; if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV3) && (vtyp == VCHR || vtyp == VBLK)) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_int32_t, *tl++); minor = fxdr_unsigned(u_int32_t, *tl); nva.na_rdev = NFSMAKEDEV(major, minor); } } dirfor_ret = nfsvno_getattr(dp, &dirfor, nd, p, 0, NULL); if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) { if (!dirfor_ret && NFSVNO_ISSETGID(&nva) && dirfor.na_gid == nva.na_gid) NFSVNO_UNSET(&nva, gid); nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); } if (nd->nd_repstat) { vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif nfsvno_relpathbuf(&named); if (pathcp) FREE(pathcp, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } /* * Yuck! For V4, mkdir and link are here and some V4 clients don't fill * in va_mode, so we'll have to set a default here. */ if (NFSVNO_NOTSETMODE(&nva)) { if (vtyp == VLNK) nva.na_mode = 0755; else nva.na_mode = 0400; } if (vtyp == VDIR) named.ni_cnd.cn_flags |= WILLBEDIR; nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); if (nd->nd_repstat) { if (dirp) { if (nd->nd_flag & ND_NFSV3) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); vrele(dirp); } #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (dirp) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); if ((nd->nd_flag & ND_NFSV4) && (vtyp == VDIR || vtyp == VLNK)) { if (vtyp == VDIR) { nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, &attrbits, aclp, p, exp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif goto out; } else if (vtyp == VLNK) { nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, &attrbits, aclp, p, exp, pathcp, pathlen); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif FREE(pathcp, M_TEMP); goto out; } } nd->nd_repstat = nfsvno_mknod(&named, &nva, nd->nd_cred, p); if (!nd->nd_repstat) { vp = named.ni_vp; nfsrv_fixattr(nd, vp, &nva, aclp, p, &attrbits, exp); nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if ((nd->nd_flag & ND_NFSV3) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (vpp != NULL && nd->nd_repstat == 0) { NFSVOPUNLOCK(vp, 0); *vpp = vp; } else vput(vp); } diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } else { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); (void) nfsrv_putattrbit(nd, &attrbits); } } if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif out: NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (bufp) nfsvno_relpathbuf(&named); if (pathcp) FREE(pathcp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * nfs remove service */ APPLESTATIC int nfsrvd_remove(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1; vnode_t dirp = NULL; struct nfsvattr dirfor, diraft; char *bufp; u_long *hashp; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, DELETE, LOCKPARENT | LOCKLEAF); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vput(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); } else { vput(dp); nfsvno_relpathbuf(&named); } if (dirp) { if (!(nd->nd_flag & ND_NFSV2)) { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); } else { vrele(dirp); dirp = NULL; } } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { if (vnode_vtype(named.ni_vp) == VDIR) nd->nd_repstat = nfsvno_rmdirsub(&named, 1, nd->nd_cred, p, exp); else nd->nd_repstat = nfsvno_removesub(&named, 1, nd->nd_cred, p, exp); } else if (nd->nd_procnum == NFSPROC_RMDIR) { nd->nd_repstat = nfsvno_rmdirsub(&named, 0, nd->nd_cred, p, exp); } else { nd->nd_repstat = nfsvno_removesub(&named, 0, nd->nd_cred, p, exp); } } if (!(nd->nd_flag & ND_NFSV2)) { if (dirp) { diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); } } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs rename service */ APPLESTATIC int nfsrvd_rename(struct nfsrv_descript *nd, int isdgram, vnode_t dp, vnode_t todp, NFSPROC_T *p, struct nfsexstuff *exp, struct nfsexstuff *toexp) { u_int32_t *tl; int error = 0, fdirfor_ret = 1, fdiraft_ret = 1; int tdirfor_ret = 1, tdiraft_ret = 1; struct nameidata fromnd, tond; vnode_t fdirp = NULL, tdirp = NULL, tdp = NULL; struct nfsvattr fdirfor, fdiraft, tdirfor, tdiraft; struct nfsexstuff tnes; struct nfsrvfh tfh; char *bufp, *tbufp = NULL; u_long *hashp; fhandle_t fh; if (nd->nd_repstat) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); goto out; } if (!(nd->nd_flag & ND_NFSV2)) fdirfor_ret = nfsvno_getattr(dp, &fdirfor, nd, p, 1, NULL); tond.ni_cnd.cn_nameiop = 0; tond.ni_startdir = NULL; NFSNAMEICNDSET(&fromnd.ni_cnd, nd->nd_cred, DELETE, WANTPARENT | SAVESTART); nfsvno_setpathbuf(&fromnd, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &fromnd.ni_pathlen); if (error) { vput(dp); if (todp) vrele(todp); nfsvno_relpathbuf(&fromnd); goto out; } /* * Unlock dp in this code section, so it is unlocked before * tdp gets locked. This avoids a potential LOR if tdp is the * parent directory of dp. */ if (nd->nd_flag & ND_NFSV4) { tdp = todp; tnes = *toexp; if (dp != tdp) { NFSVOPUNLOCK(dp, 0); /* Might lock tdp. */ tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 0, NULL); } else { tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1, NULL); NFSVOPUNLOCK(dp, 0); } } else { tfh.nfsrvfh_len = 0; error = nfsrv_mtofh(nd, &tfh); if (error == 0) error = nfsvno_getfh(dp, &fh, p); if (error) { vput(dp); /* todp is always NULL except NFSv4 */ nfsvno_relpathbuf(&fromnd); goto out; } /* If this is the same file handle, just VREF() the vnode. */ if (tfh.nfsrvfh_len == NFSX_MYFH && !NFSBCMP(tfh.nfsrvfh_data, &fh, NFSX_MYFH)) { VREF(dp); tdp = dp; tnes = *exp; tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1, NULL); NFSVOPUNLOCK(dp, 0); } else { NFSVOPUNLOCK(dp, 0); nd->nd_cred->cr_uid = nd->nd_saveduid; nfsd_fhtovp(nd, &tfh, LK_EXCLUSIVE, &tdp, &tnes, NULL, 0, p); /* Locks tdp. */ if (tdp) { tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1, NULL); NFSVOPUNLOCK(tdp, 0); } } } NFSNAMEICNDSET(&tond.ni_cnd, nd->nd_cred, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART); nfsvno_setpathbuf(&tond, &tbufp, &hashp); if (!nd->nd_repstat) { error = nfsrv_parsename(nd, tbufp, hashp, &tond.ni_pathlen); if (error) { if (tdp) vrele(tdp); vrele(dp); nfsvno_relpathbuf(&fromnd); nfsvno_relpathbuf(&tond); goto out; } } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } if (tdp) vrele(tdp); vrele(dp); nfsvno_relpathbuf(&fromnd); nfsvno_relpathbuf(&tond); goto out; } /* * Done parsing, now down to business. */ nd->nd_repstat = nfsvno_namei(nd, &fromnd, dp, 0, exp, p, &fdirp); if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } if (fdirp) vrele(fdirp); if (tdp) vrele(tdp); nfsvno_relpathbuf(&tond); goto out; } if (vnode_vtype(fromnd.ni_vp) == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; nd->nd_repstat = nfsvno_namei(nd, &tond, tdp, 0, &tnes, p, &tdirp); nd->nd_repstat = nfsvno_rename(&fromnd, &tond, nd->nd_repstat, nd->nd_flag, nd->nd_cred, p); if (fdirp) fdiraft_ret = nfsvno_getattr(fdirp, &fdiraft, nd, p, 0, NULL); if (tdirp) tdiraft_ret = nfsvno_getattr(tdirp, &tdiraft, nd, p, 0, NULL); if (fdirp) vrele(fdirp); if (tdirp) vrele(tdirp); if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 10 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(fdirfor.na_filerev, tl); tl += 2; txdr_hyper(fdiraft.na_filerev, tl); tl += 2; *tl++ = newnfs_false; txdr_hyper(tdirfor.na_filerev, tl); tl += 2; txdr_hyper(tdiraft.na_filerev, tl); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs link service */ APPLESTATIC int nfsrvd_link(struct nfsrv_descript *nd, int isdgram, vnode_t vp, vnode_t tovp, NFSPROC_T *p, struct nfsexstuff *exp, struct nfsexstuff *toexp) { struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1, getret = 1; vnode_t dirp = NULL, dp = NULL; struct nfsvattr dirfor, diraft, at; struct nfsexstuff tnes; struct nfsrvfh dfh; char *bufp; u_long *hashp; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSVOPUNLOCK(vp, 0); if (vnode_vtype(vp) == VDIR) { if (nd->nd_flag & ND_NFSV4) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; if (tovp) vrele(tovp); } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { dp = tovp; tnes = *toexp; } else { error = nfsrv_mtofh(nd, &dfh); if (error) { vrele(vp); /* tovp is always NULL unless NFSv4 */ goto out; } nfsd_fhtovp(nd, &dfh, LK_EXCLUSIVE, &dp, &tnes, NULL, 0, p); if (dp) NFSVOPUNLOCK(dp, 0); } } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVENAME | NOCACHE); if (!nd->nd_repstat) { nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(vp); if (dp) vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, &tnes, p, &dirp); } else { if (dp) vrele(dp); nfsvno_relpathbuf(&named); } } if (dirp) { if (nd->nd_flag & ND_NFSV2) { vrele(dirp); dirp = NULL; } else { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); } } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_link(&named, vp, nd->nd_cred, p, exp); if (nd->nd_flag & ND_NFSV3) getret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); if (dirp) { diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); } vrele(vp); if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &at); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs symbolic link service */ APPLESTATIC int nfsrvd_symlink(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nameidata named; int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen; vnode_t dirp = NULL; char *bufp, *pathcp = NULL; u_long *hashp; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (vpp) *vpp = NULL; NFSVNO_ATTRINIT(&nva); NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (!error && !nd->nd_repstat) error = nfsvno_getsymlink(nd, &nva, p, &pathcp, &pathlen); if (error) { vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) { vrele(dirp); dirp = NULL; } /* * And call nfsrvd_symlinksub() to do the common code. It will * return EBADRPC upon a parsing error, 0 otherwise. */ if (!nd->nd_repstat) { if (dirp != NULL) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, NULL, NULL, p, exp, pathcp, pathlen); } else if (dirp != NULL) { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); vrele(dirp); } if (pathcp) FREE(pathcp, M_TEMP); if (nd->nd_flag & ND_NFSV3) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } out: NFSEXITCODE2(error, nd); return (error); } /* * Common code for creating a symbolic link. */ static void nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp, int pathlen) { u_int32_t *tl; nd->nd_repstat = nfsvno_symlink(ndp, nvap, pathcp, pathlen, !(nd->nd_flag & ND_NFSV2), nd->nd_saveduid, nd->nd_cred, p, exp); if (!nd->nd_repstat && !(nd->nd_flag & ND_NFSV2)) { nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp); if (nd->nd_flag & ND_NFSV3) { nd->nd_repstat = nfsvno_getfh(ndp->ni_vp, fhp, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(ndp->ni_vp, nvap, nd, p, 1, NULL); } if (vpp != NULL && nd->nd_repstat == 0) { NFSVOPUNLOCK(ndp->ni_vp, 0); *vpp = ndp->ni_vp; } else vput(ndp->ni_vp); } if (dirp) { *diraft_retp = nfsvno_getattr(dirp, diraftp, nd, p, 0, NULL); vrele(dirp); } if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirforp->na_filerev, tl); tl += 2; txdr_hyper(diraftp->na_filerev, tl); (void) nfsrv_putattrbit(nd, attrbitp); } NFSEXITCODE2(0, nd); } /* * nfs mkdir service */ APPLESTATIC int nfsrvd_mkdir(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1; vnode_t dirp = NULL; char *bufp; u_long *hashp; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVENAME | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV3) { error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p); if (error) goto nfsmout; } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nva.na_mode = nfstov_mode(*tl++); } } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) { vrele(dirp); dirp = NULL; } if (nd->nd_repstat) { if (dirp != NULL) { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (dirp != NULL) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); /* * Call nfsrvd_mkdirsub() for the code common to V4 as well. */ nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, NULL, NULL, p, exp); if (nd->nd_flag & ND_NFSV3) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_fillattr(nd, &nva); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } /* * Code common to mkdir for V2,3 and 4. */ static void nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp) { vnode_t vp; u_int32_t *tl; NFSVNO_SETATTRVAL(nvap, type, VDIR); nd->nd_repstat = nfsvno_mkdir(ndp, nvap, nd->nd_saveduid, nd->nd_cred, p, exp); if (!nd->nd_repstat) { vp = ndp->ni_vp; nfsrv_fixattr(nd, vp, nvap, aclp, p, attrbitp, exp); nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, nvap, nd, p, 1, NULL); if (vpp && !nd->nd_repstat) { NFSVOPUNLOCK(vp, 0); *vpp = vp; } else { vput(vp); } } if (dirp) { *diraft_retp = nfsvno_getattr(dirp, diraftp, nd, p, 0, NULL); vrele(dirp); } if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirforp->na_filerev, tl); tl += 2; txdr_hyper(diraftp->na_filerev, tl); (void) nfsrv_putattrbit(nd, attrbitp); } NFSEXITCODE2(0, nd); } /* * nfs commit service */ APPLESTATIC int nfsrvd_commit(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { struct nfsvattr bfor, aft; u_int32_t *tl; int error = 0, for_ret = 1, aft_ret = 1, cnt; u_int64_t off; if (nd->nd_repstat) { nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft); goto out; } /* Return NFSERR_ISDIR in NFSv4 when commit on a directory. */ if (vp->v_type != VREG) { if (nd->nd_flag & ND_NFSV3) error = NFSERR_NOTSUPP; else error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); /* * XXX At this time VOP_FSYNC() does not accept offset and byte * count parameters, so these arguments are useless (someday maybe). */ off = fxdr_hyper(tl); tl += 2; cnt = fxdr_unsigned(int, *tl); if (nd->nd_flag & ND_NFSV3) for_ret = nfsvno_getattr(vp, &bfor, nd, p, 1, NULL); nd->nd_repstat = nfsvno_fsync(vp, off, cnt, nd->nd_cred, p); if (nd->nd_flag & ND_NFSV3) { aft_ret = nfsvno_getattr(vp, &aft, nd, p, 1, NULL); nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft); } vput(vp); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(nfsboottime.tv_usec); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs statfs service */ APPLESTATIC int nfsrvd_statfs(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { struct statfs *sf; u_int32_t *tl; int getret = 1; struct nfsvattr at; u_quad_t tval; sf = NULL; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } sf = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); nd->nd_repstat = nfsvno_statfs(vp, sf); getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); if (nd->nd_repstat) goto out; if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, NFSX_V2STATFS); *tl++ = txdr_unsigned(NFS_V2MAXDATA); *tl++ = txdr_unsigned(sf->f_bsize); *tl++ = txdr_unsigned(sf->f_blocks); *tl++ = txdr_unsigned(sf->f_bfree); *tl = txdr_unsigned(sf->f_bavail); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_V3STATFS); tval = (u_quad_t)sf->f_blocks; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_bfree; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_bavail; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_files; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_ffree; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_ffree; txdr_hyper(tval, tl); tl += 2; *tl = 0; } out: free(sf, M_STATFS); NFSEXITCODE2(0, nd); return (0); } /* * nfs fsinfo service */ APPLESTATIC int nfsrvd_fsinfo(struct nfsrv_descript *nd, int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsfsinfo fs; int getret = 1; struct nfsvattr at; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); nfsvno_getfs(&fs, isdgram); vput(vp); nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, NFSX_V3FSINFO); *tl++ = txdr_unsigned(fs.fs_rtmax); *tl++ = txdr_unsigned(fs.fs_rtpref); *tl++ = txdr_unsigned(fs.fs_rtmult); *tl++ = txdr_unsigned(fs.fs_wtmax); *tl++ = txdr_unsigned(fs.fs_wtpref); *tl++ = txdr_unsigned(fs.fs_wtmult); *tl++ = txdr_unsigned(fs.fs_dtpref); txdr_hyper(fs.fs_maxfilesize, tl); tl += 2; txdr_nfsv3time(&fs.fs_timedelta, tl); tl += 2; *tl = txdr_unsigned(fs.fs_properties); out: NFSEXITCODE2(0, nd); return (0); } /* * nfs pathconf service */ APPLESTATIC int nfsrvd_pathconf(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { struct nfsv3_pathconf *pc; int getret = 1; register_t linkmax, namemax, chownres, notrunc; struct nfsvattr at; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } nd->nd_repstat = nfsvno_pathconf(vp, _PC_LINK_MAX, &linkmax, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_pathconf(vp, _PC_NAME_MAX, &namemax, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat=nfsvno_pathconf(vp, _PC_CHOWN_RESTRICTED, &chownres, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_pathconf(vp, _PC_NO_TRUNC, ¬runc, nd->nd_cred, p); getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); vput(vp); nfsrv_postopattr(nd, getret, &at); if (!nd->nd_repstat) { NFSM_BUILD(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF); pc->pc_linkmax = txdr_unsigned(linkmax); pc->pc_namemax = txdr_unsigned(namemax); pc->pc_notrunc = txdr_unsigned(notrunc); pc->pc_chownrestricted = txdr_unsigned(chownres); /* * These should probably be supported by VOP_PATHCONF(), but * until msdosfs is exportable (why would you want to?), the * Unix defaults should be ok. */ pc->pc_caseinsensitive = newnfs_false; pc->pc_casepreserving = newnfs_true; } out: NFSEXITCODE2(0, nd); return (0); } /* * nfsv4 lock service */ APPLESTATIC int nfsrvd_lock(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp = NULL; struct nfslock *lop; struct nfslockconflict cf; int error = 0; u_short flags = NFSLCK_LOCK, lflags; u_int64_t offset, len; nfsv4stateid_t stateid; nfsquad_t clientid; NFSM_DISSECT(tl, u_int32_t *, 7 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: lflags = NFSLCK_READ; break; case NFSV4LOCKT_WRITEW: flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: lflags = NFSLCK_WRITE; break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (*tl++ == newnfs_true) flags |= NFSLCK_RECLAIM; offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; if (*tl == newnfs_true) flags |= NFSLCK_OPENTOLOCK; if (flags & NFSLCK_OPENTOLOCK) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED + NFSX_STATEID); i = fxdr_unsigned(int, *(tl+4+(NFSX_STATEID / NFSX_UNSIGNED))); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = nd->nd_rp; stp->ls_seq = fxdr_unsigned(int, *tl++); stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stp->ls_stateid = nd->nd_curstateid; stp->ls_stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } stp->ls_opentolockseq = fxdr_unsigned(int, *tl++); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK3 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stp->ls_stateid = nd->nd_curstateid; stp->ls_stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } stp->ls_seq = fxdr_unsigned(int, *tl); clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK4 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } } MALLOC(lop, struct nfslock *, sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); lop->lo_first = offset; if (len == NFS64BITSSET) { lop->lo_end = NFS64BITSSET; } else { lop->lo_end = offset + len; if (lop->lo_end <= lop->lo_first) nd->nd_repstat = NFSERR_INVAL; } lop->lo_flags = lflags; stp->ls_flags = flags; stp->ls_uid = nd->nd_cred->cr_uid; /* * Do basic access checking. */ if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } if (!nd->nd_repstat) { if (lflags & NFSLCK_WRITE) { nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } else { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } } /* * We call nfsrv_lockctrl() even if nd_repstat set, so that the * seqid# gets updated. nfsrv_lockctrl() will return the value * of nd_repstat, if it gets that far. */ nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid, &stateid, exp, nd, p); if (lop) FREE((caddr_t)lop, M_NFSDLOCK); if (stp) FREE((caddr_t)stp, M_NFSDSTATE); if (!nd->nd_repstat) { /* For NFSv4.1, set the Current StateID. */ if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_curstateid = stateid; nd->nd_flag |= ND_CURSTATEID; } NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } else if (nd->nd_repstat == NFSERR_DENIED) { NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); txdr_hyper(cf.cl_first, tl); tl += 2; if (cf.cl_end == NFS64BITSSET) len = NFS64BITSSET; else len = cf.cl_end - cf.cl_first; txdr_hyper(len, tl); tl += 2; if (cf.cl_flags == NFSLCK_WRITE) *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); else *tl++ = txdr_unsigned(NFSV4LOCKT_READ); *tl++ = stateid.other[0]; *tl = stateid.other[1]; (void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen); } vput(vp); NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); if (stp) free((caddr_t)stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 lock test service */ APPLESTATIC int nfsrvd_lockt(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp = NULL; struct nfslock lo, *lop = &lo; struct nfslockconflict cf; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; u_int64_t len; NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *(tl + 7)); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = NULL; stp->ls_flags = NFSLCK_TEST; stp->ls_uid = nd->nd_cred->cr_uid; i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: lo.lo_flags = NFSLCK_READ; break; case NFSV4LOCKT_WRITEW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: lo.lo_flags = NFSLCK_WRITE; break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } lo.lo_first = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); if (len == NFS64BITSSET) { lo.lo_end = NFS64BITSSET; } else { lo.lo_end = lo.lo_first + len; if (lo.lo_end <= lo.lo_first) nd->nd_repstat = NFSERR_INVAL; } tl += 2; clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK5 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } if (!nd->nd_repstat) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { if (nd->nd_repstat == NFSERR_DENIED) { NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); txdr_hyper(cf.cl_first, tl); tl += 2; if (cf.cl_end == NFS64BITSSET) len = NFS64BITSSET; else len = cf.cl_end - cf.cl_first; txdr_hyper(len, tl); tl += 2; if (cf.cl_flags == NFSLCK_WRITE) *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); else *tl++ = txdr_unsigned(NFSV4LOCKT_READ); *tl++ = stp->ls_stateid.other[0]; *tl = stp->ls_stateid.other[1]; (void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen); } } vput(vp); if (stp) FREE((caddr_t)stp, M_NFSDSTATE); NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); if (stp) free((caddr_t)stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 unlock service */ APPLESTATIC int nfsrvd_locku(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp; struct nfslock *lop; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; u_int64_t len; NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED + NFSX_STATEID); MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); MALLOC(lop, struct nfslock *, sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); stp->ls_flags = NFSLCK_UNLOCK; lop->lo_flags = NFSLCK_UNLOCK; stp->ls_op = nd->nd_rp; i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: break; case NFSV4LOCKT_WRITEW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: break; default: nd->nd_repstat = NFSERR_BADXDR; free(stp, M_NFSDSTATE); free(lop, M_NFSDLOCK); goto nfsmout; } stp->ls_ownerlen = 0; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_seq = fxdr_unsigned(int, *tl++); stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stp->ls_stateid = nd->nd_curstateid; stp->ls_stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } lop->lo_first = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); if (len == NFS64BITSSET) { lop->lo_end = NFS64BITSSET; } else { lop->lo_end = lop->lo_first + len; if (lop->lo_end <= lop->lo_first) nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK6 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } /* * Call nfsrv_lockctrl() even if nd_repstat is set, so that the * seqid# gets incremented. nfsrv_lockctrl() will return the * value of nd_repstat, if it gets that far. */ nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (stp) FREE((caddr_t)stp, M_NFSDSTATE); if (lop) free((caddr_t)lop, M_NFSDLOCK); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 open service */ APPLESTATIC int nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, __unused fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int i, retext; struct nfsstate *stp = NULL; int error = 0, create, claim, exclusive_flag = 0; u_int32_t rflags = NFSV4OPEN_LOCKTYPEPOSIX, acemask; int how = NFSCREATE_UNCHECKED; int32_t cverf[2], tverf[2] = { 0, 0 }; vnode_t vp = NULL, dirp = NULL; struct nfsvattr nva, dirfor, diraft; struct nameidata named; nfsv4stateid_t stateid, delegstateid; nfsattrbit_t attrbits; nfsquad_t clientid; char *bufp = NULL; u_long *hashp; NFSACL_T *aclp = NULL; #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif NFSZERO_ATTRBIT(&attrbits); named.ni_startdir = NULL; named.ni_cnd.cn_nameiop = 0; NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *(tl + 5)); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = nd->nd_rp; stp->ls_flags = NFSLCK_OPEN; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); i = fxdr_unsigned(int, *tl++); retext = 0; if ((i & (NFSV4OPEN_WANTDELEGMASK | NFSV4OPEN_WANTSIGNALDELEG | NFSV4OPEN_WANTPUSHDELEG)) != 0 && (nd->nd_flag & ND_NFSV41) != 0) { retext = 1; /* For now, ignore these. */ i &= ~(NFSV4OPEN_WANTPUSHDELEG | NFSV4OPEN_WANTSIGNALDELEG); switch (i & NFSV4OPEN_WANTDELEGMASK) { case NFSV4OPEN_WANTANYDELEG: stp->ls_flags |= (NFSLCK_WANTRDELEG | NFSLCK_WANTWDELEG); i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTREADDELEG: stp->ls_flags |= NFSLCK_WANTRDELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTWRITEDELEG: stp->ls_flags |= NFSLCK_WANTWDELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTNODELEG: stp->ls_flags |= NFSLCK_WANTNODELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTCANCEL: printf("NFSv4: ignore Open WantCancel\n"); i &= ~NFSV4OPEN_WANTDELEGMASK; break; default: /* nd_repstat will be set to NFSERR_INVAL below. */ break; } } switch (i) { case NFSV4OPEN_ACCESSREAD: stp->ls_flags |= NFSLCK_READACCESS; break; case NFSV4OPEN_ACCESSWRITE: stp->ls_flags |= NFSLCK_WRITEACCESS; break; case NFSV4OPEN_ACCESSBOTH: stp->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); break; default: nd->nd_repstat = NFSERR_INVAL; } i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4OPEN_DENYNONE: break; case NFSV4OPEN_DENYREAD: stp->ls_flags |= NFSLCK_READDENY; break; case NFSV4OPEN_DENYWRITE: stp->ls_flags |= NFSLCK_WRITEDENY; break; case NFSV4OPEN_DENYBOTH: stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY); break; default: nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK7 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; NFSVNO_ATTRINIT(&nva); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); create = fxdr_unsigned(int, *tl); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(dp, &dirfor, nd, p, 0, NULL); if (create == NFSV4OPEN_CREATE) { nva.na_type = VREG; nva.na_mode = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSCREATE_UNCHECKED: case NFSCREATE_GUARDED: error = nfsv4_sattr(nd, NULL, &nva, &attrbits, aclp, p); if (error) goto nfsmout; /* * If the na_gid being set is the same as that of * the directory it is going in, clear it, since * that is what will be set by default. This allows * a user that isn't in that group to do the create. */ if (!nd->nd_repstat && NFSVNO_ISSETGID(&nva) && nva.na_gid == dirfor.na_gid) NFSVNO_UNSET(&nva, gid); if (!nd->nd_repstat) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); break; case NFSCREATE_EXCLUSIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; break; case NFSCREATE_EXCLUSIVE41: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; error = nfsv4_sattr(nd, vp, &nva, &attrbits, aclp, p); if (error != 0) goto nfsmout; if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET)) nd->nd_repstat = NFSERR_INVAL; /* * If the na_gid being set is the same as that of * the directory it is going in, clear it, since * that is what will be set by default. This allows * a user that isn't in that group to do the create. */ if (nd->nd_repstat == 0 && NFSVNO_ISSETGID(&nva) && nva.na_gid == dirfor.na_gid) NFSVNO_UNSET(&nva, gid); if (nd->nd_repstat == 0) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } } else if (create != NFSV4OPEN_NOCREATE) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } /* * Now, handle the claim, which usually includes looking up a * name in the directory referenced by dp. The exception is * NFSV4OPEN_CLAIMPREVIOUS. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); claim = fxdr_unsigned(int, *tl); if (claim == NFSV4OPEN_CLAIMDELEGATECUR) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl,(caddr_t)stateid.other,NFSX_STATEIDOTHER); stp->ls_flags |= NFSLCK_DELEGCUR; } else if (claim == NFSV4OPEN_CLAIMDELEGATEPREV) { stp->ls_flags |= NFSLCK_DELEGPREV; } if (claim == NFSV4OPEN_CLAIMNULL || claim == NFSV4OPEN_CLAIMDELEGATECUR || claim == NFSV4OPEN_CLAIMDELEGATEPREV) { if (!nd->nd_repstat && create == NFSV4OPEN_CREATE && claim != NFSV4OPEN_CLAIMNULL) nd->nd_repstat = NFSERR_INVAL; if (nd->nd_repstat) { nd->nd_repstat = nfsrv_opencheck(clientid, &stateid, stp, NULL, nd, p, nd->nd_repstat); goto nfsmout; } if (create == NFSV4OPEN_CREATE) NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); else NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif FREE((caddr_t)stp, M_NFSDSTATE); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (create == NFSV4OPEN_CREATE) { switch (how) { case NFSCREATE_UNCHECKED: if (named.ni_vp) { /* * Clear the setable attribute bits, except * for Size, if it is being truncated. */ NFSZERO_ATTRBIT(&attrbits); if (NFSVNO_ISSETSIZE(&nva)) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); } break; case NFSCREATE_GUARDED: if (named.ni_vp && !nd->nd_repstat) nd->nd_repstat = EEXIST; break; case NFSCREATE_EXCLUSIVE: exclusive_flag = 1; if (!named.ni_vp) nva.na_mode = 0; break; case NFSCREATE_EXCLUSIVE41: exclusive_flag = 1; break; } } nfsvno_open(nd, &named, clientid, &stateid, stp, &exclusive_flag, &nva, cverf, create, aclp, &attrbits, nd->nd_cred, p, exp, &vp); } else if (claim == NFSV4OPEN_CLAIMPREVIOUS || claim == NFSV4OPEN_CLAIMFH) { if (claim == NFSV4OPEN_CLAIMPREVIOUS) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); switch (i) { case NFSV4OPEN_DELEGATEREAD: stp->ls_flags |= NFSLCK_DELEGREAD; break; case NFSV4OPEN_DELEGATEWRITE: stp->ls_flags |= NFSLCK_DELEGWRITE; case NFSV4OPEN_DELEGATENONE: break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp->ls_flags |= NFSLCK_RECLAIM; } else { /* CLAIM_NULL_FH */ if (nd->nd_repstat == 0 && create == NFSV4OPEN_CREATE) nd->nd_repstat = NFSERR_INVAL; } vp = dp; NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) == 0) nd->nd_repstat = nfsrv_opencheck(clientid, &stateid, stp, vp, nd, p, nd->nd_repstat); else nd->nd_repstat = NFSERR_PERM; } else { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } /* * Do basic access checking. */ if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { /* * The IETF working group decided that this is the correct * error return for all non-regular files. */ nd->nd_repstat = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_SYMLINK; } if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_WRITEACCESS)) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_READACCESS)) { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) { tverf[0] = nva.na_atime.tv_sec; tverf[1] = nva.na_atime.tv_nsec; } } if (!nd->nd_repstat && exclusive_flag && (cverf[0] != tverf[0] || cverf[1] != tverf[1])) nd->nd_repstat = EEXIST; /* * Do the open locking/delegation stuff. */ if (!nd->nd_repstat) nd->nd_repstat = nfsrv_openctrl(nd, vp, &stp, clientid, &stateid, &delegstateid, &rflags, exp, p, nva.na_filerev); /* * vp must be unlocked before the call to nfsvno_getattr(dirp,...) * below, to avoid a deadlock with the lookup in nfsvno_namei() above. * (ie: Leave the NFSVOPUNLOCK() about here.) */ if (vp) NFSVOPUNLOCK(vp, 0); if (stp) FREE((caddr_t)stp, M_NFSDSTATE); if (!nd->nd_repstat && dirp) nd->nd_repstat = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); if (!nd->nd_repstat) { /* For NFSv4.1, set the Current StateID. */ if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_curstateid = stateid; nd->nd_flag |= ND_CURSTATEID; } NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (claim == NFSV4OPEN_CLAIMPREVIOUS) { *tl++ = newnfs_true; *tl++ = 0; *tl++ = 0; *tl++ = 0; *tl++ = 0; } else { *tl++ = newnfs_false; /* Since dirp is not locked */ txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); tl += 2; } *tl = txdr_unsigned(rflags & NFSV4OPEN_RFLAGS); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (rflags & NFSV4OPEN_READDELEGATE) *tl = txdr_unsigned(NFSV4OPEN_DELEGATEREAD); else if (rflags & NFSV4OPEN_WRITEDELEGATE) *tl = txdr_unsigned(NFSV4OPEN_DELEGATEWRITE); else if (retext != 0) { *tl = txdr_unsigned(NFSV4OPEN_DELEGATENONEEXT); if ((rflags & NFSV4OPEN_WDCONTENTION) != 0) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CONTENTION); *tl = newnfs_false; } else if ((rflags & NFSV4OPEN_WDRESOURCE) != 0) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_RESOURCE); *tl = newnfs_false; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_NOTWANTED); } } else *tl = txdr_unsigned(NFSV4OPEN_DELEGATENONE); if (rflags & (NFSV4OPEN_READDELEGATE|NFSV4OPEN_WRITEDELEGATE)) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID+NFSX_UNSIGNED); *tl++ = txdr_unsigned(delegstateid.seqid); NFSBCOPY((caddr_t)delegstateid.other, (caddr_t)tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (rflags & NFSV4OPEN_RECALL) *tl = newnfs_true; else *tl = newnfs_false; if (rflags & NFSV4OPEN_WRITEDELEGATE) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_LIMITSIZE); txdr_hyper(nva.na_size, tl); } NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4ACE_ALLOWEDTYPE); *tl++ = txdr_unsigned(0x0); acemask = NFSV4ACE_ALLFILESMASK; if (nva.na_mode & S_IRUSR) acemask |= NFSV4ACE_READMASK; if (nva.na_mode & S_IWUSR) acemask |= NFSV4ACE_WRITEMASK; if (nva.na_mode & S_IXUSR) acemask |= NFSV4ACE_EXECUTEMASK; *tl = txdr_unsigned(acemask); (void) nfsm_strtom(nd, "OWNER@", 6); } *vpp = vp; } else if (vp) { vrele(vp); } if (dirp) vrele(dirp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (stp) FREE((caddr_t)stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 close service */ APPLESTATIC int nfsrvd_close(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate st, *stp = &st; int error = 0, writeacc; nfsv4stateid_t stateid; nfsquad_t clientid; struct nfsvattr na; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) stp->ls_stateid = nd->nd_curstateid; else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } stp->ls_flags = NFSLCK_CLOSE; clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK8 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p, &writeacc); /* For pNFS, update the attributes. */ if (writeacc != 0 || nfsrv_pnfsatime != 0) nfsrv_updatemdsattr(vp, &na, p); vput(vp); if (!nd->nd_repstat) { /* * If the stateid that has been closed is the current stateid, * unset it. */ if ((nd->nd_flag & ND_CURSTATEID) != 0 && stateid.other[0] == nd->nd_curstateid.other[0] && stateid.other[1] == nd->nd_curstateid.other[1] && stateid.other[2] == nd->nd_curstateid.other[2]) nd->nd_flag &= ~ND_CURSTATEID; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 delegpurge service */ APPLESTATIC int nfsrvd_delegpurge(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK9 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_delegupdate(nd, clientid, NULL, NULL, NFSV4OP_DELEGPURGE, nd->nd_cred, p, NULL); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 delegreturn service */ APPLESTATIC int nfsrvd_delegreturn(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0, writeacc; nfsv4stateid_t stateid; nfsquad_t clientid; struct nfsvattr na; NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other, NFSX_STATEIDOTHER); clientid.lval[0] = stateid.other[0]; clientid.lval[1] = stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK10 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_delegupdate(nd, clientid, &stateid, vp, NFSV4OP_DELEGRETURN, nd->nd_cred, p, &writeacc); /* For pNFS, update the attributes. */ if (writeacc != 0 || nfsrv_pnfsatime != 0) nfsrv_updatemdsattr(vp, &na, p); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 get file handle service */ APPLESTATIC int nfsrvd_getfh(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { fhandle_t fh; nd->nd_repstat = nfsvno_getfh(vp, &fh, p); vput(vp); if (!nd->nd_repstat) (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0); NFSEXITCODE2(0, nd); return (0); } /* * nfsv4 open confirm service */ APPLESTATIC int nfsrvd_openconfirm(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate st, *stp = &st; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); stp->ls_seq = fxdr_unsigned(u_int32_t, *tl); stp->ls_flags = NFSLCK_CONFIRM; clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK11 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p, NULL); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 open downgrade service */ APPLESTATIC int nfsrvd_opendowngrade(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate st, *stp = &st; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; /* opendowngrade can only work on a file object.*/ if (vp->v_type != VREG) { error = NFSERR_INVAL; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) stp->ls_stateid = nd->nd_curstateid; else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4OPEN_ACCESSREAD: stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_DOWNGRADE); break; case NFSV4OPEN_ACCESSWRITE: stp->ls_flags = (NFSLCK_WRITEACCESS | NFSLCK_DOWNGRADE); break; case NFSV4OPEN_ACCESSBOTH: stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_WRITEACCESS | NFSLCK_DOWNGRADE); break; default: nd->nd_repstat = NFSERR_BADXDR; } i = fxdr_unsigned(int, *tl); switch (i) { case NFSV4OPEN_DENYNONE: break; case NFSV4OPEN_DENYREAD: stp->ls_flags |= NFSLCK_READDENY; break; case NFSV4OPEN_DENYWRITE: stp->ls_flags |= NFSLCK_WRITEDENY; break; case NFSV4OPEN_DENYBOTH: stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY); break; default: nd->nd_repstat = NFSERR_BADXDR; } clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK12 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } if (!nd->nd_repstat) nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p, NULL); if (!nd->nd_repstat) { /* For NFSv4.1, set the Current StateID. */ if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_curstateid = stateid; nd->nd_flag |= ND_CURSTATEID; } NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 renew lease service */ APPLESTATIC int nfsrvd_renew(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK13 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_RENEWOP|CLOPS_RENEW), NULL, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 security info service */ APPLESTATIC int nfsrvd_secinfo(struct nfsrv_descript *nd, int isdgram, vnode_t dp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int len; struct nameidata named; vnode_t dirp = NULL, vp; struct nfsrvfh fh; struct nfsexstuff retnes; u_int32_t *sizp; int error = 0, savflag, i; char *bufp; u_long *hashp; /* * All this just to get the export flags for the name. */ NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vput(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); } else { vput(dp); nfsvno_relpathbuf(&named); } if (dirp) vrele(dirp); if (nd->nd_repstat) goto out; vrele(named.ni_startdir); nfsvno_relpathbuf(&named); fh.nfsrvfh_len = NFSX_MYFH; vp = named.ni_vp; nd->nd_repstat = nfsvno_getfh(vp, (fhandle_t *)fh.nfsrvfh_data, p); vput(vp); savflag = nd->nd_flag; if (!nd->nd_repstat) { nfsd_fhtovp(nd, &fh, LK_SHARED, &vp, &retnes, NULL, 0, p); if (vp) vput(vp); } nd->nd_flag = savflag; if (nd->nd_repstat) goto out; /* * Finally have the export flags for name, so we can create * the security info. */ len = 0; NFSM_BUILD(sizp, u_int32_t *, NFSX_UNSIGNED); for (i = 0; i < retnes.nes_numsecflavor; i++) { if (retnes.nes_secflavors[i] == AUTH_SYS) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(RPCAUTH_UNIX); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCNONE); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5I) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5P) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY); len++; } } *sizp = txdr_unsigned(len); out: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 set client id service */ APPLESTATIC int nfsrvd_setclientid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int i; int error = 0, idlen; struct nfsclient *clp = NULL; struct sockaddr_in *rad; u_char *verf, *ucp, *ucp2, addrbuf[24]; nfsquad_t clientid, confirm; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto out; } NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED); verf = (u_char *)tl; tl += (NFSX_VERF / NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i > NFSV4_OPAQUELIMIT || i <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } idlen = i; if (nd->nd_flag & ND_GSS) i += nd->nd_princlen; clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK | M_ZERO); clp->lc_stateid = malloc(sizeof(struct nfsstatehead) * nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK); NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx); NFSSOCKADDRALLOC(clp->lc_req.nr_nam); NFSSOCKADDRSIZE(clp->lc_req.nr_nam, sizeof (struct sockaddr_in)); clp->lc_req.nr_cred = NULL; NFSBCOPY(verf, clp->lc_verf, NFSX_VERF); clp->lc_idlen = idlen; error = nfsrv_mtostr(nd, clp->lc_id, idlen); if (error) goto nfsmout; if (nd->nd_flag & ND_GSS) { clp->lc_flags = LCL_GSS; if (nd->nd_flag & ND_GSSINTEGRITY) clp->lc_flags |= LCL_GSSINTEGRITY; else if (nd->nd_flag & ND_GSSPRIVACY) clp->lc_flags |= LCL_GSSPRIVACY; } else { clp->lc_flags = 0; } if ((nd->nd_flag & ND_GSS) && nd->nd_princlen > 0) { clp->lc_flags |= LCL_NAME; clp->lc_namelen = nd->nd_princlen; clp->lc_name = &clp->lc_id[idlen]; NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen); } else { clp->lc_uid = nd->nd_cred->cr_uid; clp->lc_gid = nd->nd_cred->cr_gid; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); clp->lc_program = fxdr_unsigned(u_int32_t, *tl); error = nfsrv_getclientipaddr(nd, clp); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); clp->lc_callback = fxdr_unsigned(u_int32_t, *tl); /* * nfsrv_setclient() does the actual work of adding it to the * client list. If there is no error, the structure has been * linked into the client list and clp should no longer be used * here. When an error is returned, it has not been linked in, * so it should be free'd. */ nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p); if (nd->nd_repstat == NFSERR_CLIDINUSE) { if (clp->lc_flags & LCL_TCPCALLBACK) (void) nfsm_strtom(nd, "tcp", 3); else (void) nfsm_strtom(nd, "udp", 3); rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); ucp = (u_char *)&rad->sin_addr.s_addr; ucp2 = (u_char *)&rad->sin_port; sprintf(addrbuf, "%d.%d.%d.%d.%d.%d", ucp[0] & 0xff, ucp[1] & 0xff, ucp[2] & 0xff, ucp[3] & 0xff, ucp2[0] & 0xff, ucp2[1] & 0xff); (void) nfsm_strtom(nd, addrbuf, strlen(addrbuf)); } if (clp) { NFSSOCKADDRFREE(clp->lc_req.nr_nam); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER); *tl++ = clientid.lval[0]; *tl++ = clientid.lval[1]; *tl++ = confirm.lval[0]; *tl = confirm.lval[1]; } out: NFSEXITCODE2(0, nd); return (0); nfsmout: if (clp) { NFSSOCKADDRFREE(clp->lc_req.nr_nam); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 set client id confirm service */ APPLESTATIC int nfsrvd_setclientidcfrm(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid, confirm; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; confirm.lval[0] = *tl++; confirm.lval[1] = *tl; /* * nfsrv_getclient() searches the client list for a match and * returns the appropriate NFSERR status. */ nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_CONFIRM|CLOPS_RENEW), NULL, NULL, confirm, 0, nd, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 verify service */ APPLESTATIC int nfsrvd_verify(struct nfsrv_descript *nd, int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { int error = 0, ret, fhsize = NFSX_MYFH; struct nfsvattr nva; struct statfs *sf; struct nfsfsinfo fs; fhandle_t fh; sf = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_statfs(vp, sf); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) { nfsvno_getfs(&fs, isdgram); error = nfsv4_loadattr(nd, vp, &nva, NULL, &fh, fhsize, NULL, sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, p, nd->nd_cred); if (!error) { if (nd->nd_procnum == NFSV4OP_NVERIFY) { if (ret == 0) nd->nd_repstat = NFSERR_SAME; else if (ret != NFSERR_NOTSAME) nd->nd_repstat = ret; } else if (ret) nd->nd_repstat = ret; } } vput(vp); free(sf, M_STATFS); NFSEXITCODE2(error, nd); return (error); } /* * nfs openattr rpc */ APPLESTATIC int nfsrvd_openattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, __unused vnode_t *vpp, __unused fhandle_t *fhp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0, createdir; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); createdir = fxdr_unsigned(int, *tl); nd->nd_repstat = NFSERR_NOTSUPP; nfsmout: vrele(dp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 release lock owner service */ APPLESTATIC int nfsrvd_releaselckown(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate *stp = NULL; int error = 0, len; nfsquad_t clientid; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 2)); if (len <= 0 || len > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate) + len, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = len; stp->ls_op = NULL; stp->ls_flags = NFSLCK_RELEASE; stp->ls_uid = nd->nd_cred->cr_uid; clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK14 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, len); if (error) goto nfsmout; nd->nd_repstat = nfsrv_releaselckown(stp, clientid, p); FREE((caddr_t)stp, M_NFSDSTATE); NFSEXITCODE2(0, nd); return (0); nfsmout: if (stp) free((caddr_t)stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 exchange_id service */ APPLESTATIC int nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0, i, idlen; struct nfsclient *clp = NULL; nfsquad_t clientid, confirm; uint8_t *verf; uint32_t sp4type, v41flags; uint64_t owner_minor; struct timespec verstime; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED); verf = (uint8_t *)tl; tl += (NFSX_VERF / NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i > NFSV4_OPAQUELIMIT || i <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } idlen = i; if (nd->nd_flag & ND_GSS) i += nd->nd_princlen; clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK | M_ZERO); clp->lc_stateid = malloc(sizeof(struct nfsstatehead) * nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK); NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx); NFSSOCKADDRALLOC(clp->lc_req.nr_nam); NFSSOCKADDRSIZE(clp->lc_req.nr_nam, sizeof (struct sockaddr_in)); clp->lc_req.nr_cred = NULL; NFSBCOPY(verf, clp->lc_verf, NFSX_VERF); clp->lc_idlen = idlen; error = nfsrv_mtostr(nd, clp->lc_id, idlen); if (error != 0) goto nfsmout; if ((nd->nd_flag & ND_GSS) != 0) { clp->lc_flags = LCL_GSS | LCL_NFSV41; if ((nd->nd_flag & ND_GSSINTEGRITY) != 0) clp->lc_flags |= LCL_GSSINTEGRITY; else if ((nd->nd_flag & ND_GSSPRIVACY) != 0) clp->lc_flags |= LCL_GSSPRIVACY; } else clp->lc_flags = LCL_NFSV41; if ((nd->nd_flag & ND_GSS) != 0 && nd->nd_princlen > 0) { clp->lc_flags |= LCL_NAME; clp->lc_namelen = nd->nd_princlen; clp->lc_name = &clp->lc_id[idlen]; NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen); } else { clp->lc_uid = nd->nd_cred->cr_uid; clp->lc_gid = nd->nd_cred->cr_gid; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); v41flags = fxdr_unsigned(uint32_t, *tl++); if ((v41flags & ~(NFSV4EXCH_SUPPMOVEDREFER | NFSV4EXCH_SUPPMOVEDMIGR | NFSV4EXCH_BINDPRINCSTATEID | NFSV4EXCH_MASKPNFS | NFSV4EXCH_UPDCONFIRMEDRECA)) != 0) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } if ((v41flags & NFSV4EXCH_UPDCONFIRMEDRECA) != 0) confirm.lval[1] = 1; else confirm.lval[1] = 0; if (TAILQ_EMPTY(&nfsrv_devidhead)) v41flags = NFSV4EXCH_USENONPNFS | NFSV4EXCH_USEPNFSDS; else v41flags = NFSV4EXCH_USEPNFSMDS; sp4type = fxdr_unsigned(uint32_t, *tl); if (sp4type != NFSV4EXCH_SP4NONE) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } /* * nfsrv_setclient() does the actual work of adding it to the * client list. If there is no error, the structure has been * linked into the client list and clp should no longer be used * here. When an error is returned, it has not been linked in, * so it should be free'd. */ nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p); if (clp != NULL) { NFSSOCKADDRFREE(clp->lc_req.nr_nam); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } if (nd->nd_repstat == 0) { if (confirm.lval[1] != 0) v41flags |= NFSV4EXCH_CONFIRMEDR; NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + 3 * NFSX_UNSIGNED); *tl++ = clientid.lval[0]; /* ClientID */ *tl++ = clientid.lval[1]; *tl++ = txdr_unsigned(confirm.lval[0]); /* SequenceID */ *tl++ = txdr_unsigned(v41flags); /* Exch flags */ *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE); /* No SSV */ owner_minor = 0; /* Owner */ txdr_hyper(owner_minor, tl); /* Minor */ (void)nfsm_strtom(nd, nd->nd_cred->cr_prison->pr_hostuuid, strlen(nd->nd_cred->cr_prison->pr_hostuuid)); /* Major */ NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSX_UNSIGNED); *tl++ = time_uptime; /* Make scope a unique value. */ *tl = txdr_unsigned(1); (void)nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org")); (void)nfsm_strtom(nd, version, strlen(version)); NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME); verstime.tv_sec = 1293840000; /* Jan 1, 2011 */ verstime.tv_nsec = 0; txdr_nfsv4time(&verstime, tl); } NFSEXITCODE2(0, nd); return (0); nfsmout: if (clp != NULL) { NFSSOCKADDRFREE(clp->lc_req.nr_nam); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 create session service */ APPLESTATIC int nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0; nfsquad_t clientid, confirm; struct nfsdsession *sep = NULL; uint32_t rdmacnt; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } sep = (struct nfsdsession *)malloc(sizeof(struct nfsdsession), M_NFSDSESSION, M_WAITOK | M_ZERO); sep->sess_refcnt = 1; mtx_init(&sep->sess_cbsess.nfsess_mtx, "nfscbsession", NULL, MTX_DEF); NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; confirm.lval[0] = fxdr_unsigned(uint32_t, *tl++); sep->sess_crflags = fxdr_unsigned(uint32_t, *tl); /* Persistent sessions and RDMA are not supported. */ sep->sess_crflags &= NFSV4CRSESS_CONNBACKCHAN; /* Fore channel attributes. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Header pad always 0. */ sep->sess_maxreq = fxdr_unsigned(uint32_t, *tl++); if (sep->sess_maxreq > sb_max_adj - NFS_MAXXDR) { sep->sess_maxreq = sb_max_adj - NFS_MAXXDR; printf("Consider increasing kern.ipc.maxsockbuf\n"); } sep->sess_maxresp = fxdr_unsigned(uint32_t, *tl++); if (sep->sess_maxresp > sb_max_adj - NFS_MAXXDR) { sep->sess_maxresp = sb_max_adj - NFS_MAXXDR; printf("Consider increasing kern.ipc.maxsockbuf\n"); } sep->sess_maxrespcached = fxdr_unsigned(uint32_t, *tl++); sep->sess_maxops = fxdr_unsigned(uint32_t, *tl++); sep->sess_maxslots = fxdr_unsigned(uint32_t, *tl++); if (sep->sess_maxslots > NFSV4_SLOTS) sep->sess_maxslots = NFSV4_SLOTS; rdmacnt = fxdr_unsigned(uint32_t, *tl); if (rdmacnt > 1) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } else if (rdmacnt == 1) NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); /* Back channel attributes. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Header pad always 0. */ sep->sess_cbmaxreq = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxresp = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxrespcached = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxops = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbsess.nfsess_foreslots = fxdr_unsigned(uint32_t, *tl++); rdmacnt = fxdr_unsigned(uint32_t, *tl); if (rdmacnt > 1) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } else if (rdmacnt == 1) NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); sep->sess_cbprogram = fxdr_unsigned(uint32_t, *tl); /* * nfsrv_getclient() searches the client list for a match and * returns the appropriate NFSERR status. */ nd->nd_repstat = nfsrv_getclient(clientid, CLOPS_CONFIRM | CLOPS_RENEW, NULL, sep, confirm, sep->sess_cbprogram, nd, p); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(sep->sess_sessionid, tl, NFSX_V4SESSIONID); NFSM_BUILD(tl, uint32_t *, 18 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(confirm.lval[0]); /* sequenceid */ *tl++ = txdr_unsigned(sep->sess_crflags); /* Fore channel attributes. */ *tl++ = 0; *tl++ = txdr_unsigned(sep->sess_maxreq); *tl++ = txdr_unsigned(sep->sess_maxresp); *tl++ = txdr_unsigned(sep->sess_maxrespcached); *tl++ = txdr_unsigned(sep->sess_maxops); *tl++ = txdr_unsigned(sep->sess_maxslots); *tl++ = txdr_unsigned(1); *tl++ = txdr_unsigned(0); /* No RDMA. */ /* Back channel attributes. */ *tl++ = 0; *tl++ = txdr_unsigned(sep->sess_cbmaxreq); *tl++ = txdr_unsigned(sep->sess_cbmaxresp); *tl++ = txdr_unsigned(sep->sess_cbmaxrespcached); *tl++ = txdr_unsigned(sep->sess_cbmaxops); *tl++ = txdr_unsigned(sep->sess_cbsess.nfsess_foreslots); *tl++ = txdr_unsigned(1); *tl = txdr_unsigned(0); /* No RDMA. */ } nfsmout: if (nd->nd_repstat != 0 && sep != NULL) free(sep, M_NFSDSESSION); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 sequence service */ APPLESTATIC int nfsrvd_sequence(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; uint32_t highest_slotid, sequenceid, sflags, target_highest_slotid; int cache_this, error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(tl, nd->nd_sessionid, NFSX_V4SESSIONID); NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); sequenceid = fxdr_unsigned(uint32_t, *tl++); nd->nd_slotid = fxdr_unsigned(uint32_t, *tl++); highest_slotid = fxdr_unsigned(uint32_t, *tl++); if (*tl == newnfs_true) cache_this = 1; else cache_this = 0; nd->nd_flag |= ND_HASSEQUENCE; nd->nd_repstat = nfsrv_checksequence(nd, sequenceid, &highest_slotid, &target_highest_slotid, cache_this, &sflags, p); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(nd->nd_sessionid, tl, NFSX_V4SESSIONID); NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(sequenceid); *tl++ = txdr_unsigned(nd->nd_slotid); *tl++ = txdr_unsigned(highest_slotid); *tl++ = txdr_unsigned(target_highest_slotid); *tl = txdr_unsigned(sflags); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 reclaim complete service */ APPLESTATIC int nfsrvd_reclaimcomplete(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) nd->nd_repstat = NFSERR_NOTSUPP; else nd->nd_repstat = nfsrv_checkreclaimcomplete(nd); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 destroy clientid service */ APPLESTATIC int nfsrvd_destroyclientid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsquad_t clientid; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; nd->nd_repstat = nfsrv_destroyclient(clientid, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 destroy session service */ APPLESTATIC int nfsrvd_destroysession(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint8_t *cp, sessid[NFSX_V4SESSIONID]; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(cp, uint8_t *, NFSX_V4SESSIONID); NFSBCOPY(cp, sessid, NFSX_V4SESSIONID); nd->nd_repstat = nfsrv_destroysession(nd, sessid); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 free stateid service */ APPLESTATIC int nfsrvd_freestateid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } nd->nd_repstat = nfsrv_freestateid(nd, &stateid, p); /* If the current stateid has been free'd, unset it. */ if (nd->nd_repstat == 0 && (nd->nd_flag & ND_CURSTATEID) != 0 && stateid.other[0] == nd->nd_curstateid.other[0] && stateid.other[1] == nd->nd_curstateid.other[1] && stateid.other[2] == nd->nd_curstateid.other[2]) nd->nd_flag &= ~ND_CURSTATEID; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layoutget service */ APPLESTATIC int nfsrvd_layoutget(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int error = 0, layoutlen, layouttype, iomode, maxcnt, retonclose; uint64_t offset, len, minlen; char *layp; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER + NFSX_STATEID); tl++; /* Signal layout available. Ignore for now. */ layouttype = fxdr_unsigned(int, *tl++); iomode = fxdr_unsigned(int, *tl++); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; minlen = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); maxcnt = fxdr_unsigned(int, tl); NFSD_DEBUG(4, "layoutget ltyp=%d iom=%d off=%ju len=%ju mlen=%ju\n", layouttype, iomode, (uintmax_t)offset, (uintmax_t)len, (uintmax_t)minlen); if (len < minlen || (minlen != UINT64_MAX && offset + minlen < offset) || (len != UINT64_MAX && offset + len < offset)) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } layp = NULL; if (layouttype == NFSLAYOUT_NFSV4_1_FILES && nfsrv_maxpnfsmirror == 1) layp = malloc(NFSX_V4FILELAYOUT, M_TEMP, M_WAITOK); else if (layouttype == NFSLAYOUT_FLEXFILE) layp = malloc(NFSX_V4FLEXLAYOUT(nfsrv_maxpnfsmirror), M_TEMP, M_WAITOK); else nd->nd_repstat = NFSERR_UNKNLAYOUTTYPE; if (layp != NULL) nd->nd_repstat = nfsrv_layoutget(nd, vp, exp, layouttype, &iomode, &offset, &len, minlen, &stateid, maxcnt, &retonclose, &layoutlen, layp, nd->nd_cred, p); NFSD_DEBUG(4, "nfsrv_layoutget stat=%u layoutlen=%d\n", nd->nd_repstat, layoutlen); if (nd->nd_repstat == 0) { /* For NFSv4.1, set the Current StateID. */ if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_curstateid = stateid; nd->nd_flag |= ND_CURSTATEID; } NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + NFSX_STATEID + 2 * NFSX_HYPER); *tl++ = txdr_unsigned(retonclose); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY(stateid.other, tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); *tl++ = txdr_unsigned(1); /* Only returns one layout. */ txdr_hyper(offset, tl); tl += 2; txdr_hyper(len, tl); tl += 2; *tl++ = txdr_unsigned(iomode); *tl = txdr_unsigned(layouttype); nfsm_strtom(nd, layp, layoutlen); } else if (nd->nd_repstat == NFSERR_LAYOUTTRYLATER) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } free(layp, M_TEMP); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layoutcommit service */ APPLESTATIC int nfsrvd_layoutcommit(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int error = 0, hasnewoff, hasnewmtime, layouttype, maxcnt, reclaim; int hasnewsize; uint64_t offset, len, newoff, newsize; struct timespec newmtime; char *layp; layp = NULL; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + 2 * NFSX_HYPER + NFSX_STATEID); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; reclaim = fxdr_unsigned(int, *tl++); stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } hasnewoff = fxdr_unsigned(int, *tl); if (hasnewoff != 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); newoff = fxdr_hyper(tl); tl += 2; } else NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); hasnewmtime = fxdr_unsigned(int, *tl); if (hasnewmtime != 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4TIME + 2 * NFSX_UNSIGNED); fxdr_nfsv4time(tl, &newmtime); tl += (NFSX_V4TIME / NFSX_UNSIGNED); } else NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); layouttype = fxdr_unsigned(int, *tl++); maxcnt = fxdr_unsigned(int, *tl); if (maxcnt > 0) { layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, layp, maxcnt); if (error != 0) goto nfsmout; } nd->nd_repstat = nfsrv_layoutcommit(nd, vp, layouttype, hasnewoff, newoff, offset, len, hasnewmtime, &newmtime, reclaim, &stateid, maxcnt, layp, &hasnewsize, &newsize, nd->nd_cred, p); NFSD_DEBUG(4, "nfsrv_layoutcommit stat=%u\n", nd->nd_repstat); if (nd->nd_repstat == 0) { if (hasnewsize != 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); *tl++ = newnfs_true; txdr_hyper(newsize, tl); } else { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } } nfsmout: free(layp, M_TEMP); vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layoutreturn service */ APPLESTATIC int nfsrvd_layoutreturn(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int error = 0, fnd, kind, layouttype, iomode, maxcnt, reclaim; uint64_t offset, len; char *layp; layp = NULL; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); reclaim = *tl++; layouttype = fxdr_unsigned(int, *tl++); iomode = fxdr_unsigned(int, *tl++); kind = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "layoutreturn recl=%d ltyp=%d iom=%d kind=%d\n", reclaim, layouttype, iomode, kind); if (kind == NFSV4LAYOUTRET_FILE) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + NFSX_UNSIGNED); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } maxcnt = fxdr_unsigned(int, *tl); if (maxcnt > 0) { layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, layp, maxcnt); if (error != 0) goto nfsmout; } } else { if (reclaim == newnfs_true) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } offset = len = 0; maxcnt = 0; } nd->nd_repstat = nfsrv_layoutreturn(nd, vp, layouttype, iomode, offset, len, reclaim, kind, &stateid, maxcnt, layp, &fnd, nd->nd_cred, p); NFSD_DEBUG(4, "nfsrv_layoutreturn stat=%u fnd=%d\n", nd->nd_repstat, fnd); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); if (fnd != 0) { *tl = newnfs_true; NFSM_BUILD(tl, uint32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY(stateid.other, tl, NFSX_STATEIDOTHER); } else *tl = newnfs_false; } nfsmout: free(layp, M_TEMP); vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 getdeviceinfo service */ APPLESTATIC int nfsrvd_getdevinfo(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl, maxcnt, notify[NFSV4_NOTIFYBITMAP]; int cnt, devaddrlen, error = 0, i, layouttype; char devid[NFSX_V4DEVICEID], *devaddr; time_t dev_time; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_V4DEVICEID); NFSBCOPY(tl, devid, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); layouttype = fxdr_unsigned(int, *tl++); maxcnt = fxdr_unsigned(uint32_t, *tl++); cnt = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "getdevinfo ltyp=%d maxcnt=%u bitcnt=%d\n", layouttype, maxcnt, cnt); if (cnt > NFSV4_NOTIFYBITMAP || cnt < 0) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } if (cnt > 0) { NFSM_DISSECT(tl, uint32_t *, cnt * NFSX_UNSIGNED); for (i = 0; i < cnt; i++) notify[i] = fxdr_unsigned(uint32_t, *tl++); } for (i = cnt; i < NFSV4_NOTIFYBITMAP; i++) notify[i] = 0; /* * Check that the device id is not stale. Device ids are recreated * each time the nfsd threads are restarted. */ NFSBCOPY(devid, &dev_time, sizeof(dev_time)); if (dev_time != nfsdev_time) { nd->nd_repstat = NFSERR_NOENT; goto nfsmout; } /* Look for the device id. */ nd->nd_repstat = nfsrv_getdevinfo(devid, layouttype, &maxcnt, notify, &devaddrlen, &devaddr); NFSD_DEBUG(4, "nfsrv_getdevinfo stat=%u\n", nd->nd_repstat); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(layouttype); nfsm_strtom(nd, devaddr, devaddrlen); cnt = 0; for (i = 0; i < NFSV4_NOTIFYBITMAP; i++) { if (notify[i] != 0) cnt = i + 1; } NFSM_BUILD(tl, uint32_t *, (cnt + 1) * NFSX_UNSIGNED); *tl++ = txdr_unsigned(cnt); for (i = 0; i < cnt; i++) *tl++ = txdr_unsigned(notify[i]); } else if (nd->nd_repstat == NFSERR_TOOSMALL) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(maxcnt); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 service not supported */ APPLESTATIC int nfsrvd_notsupp(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { nd->nd_repstat = NFSERR_NOTSUPP; NFSEXITCODE2(0, nd); return (0); }