diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c index 19896a59718d..daf8082fa1c3 100644 --- a/sys/fs/nfs/nfs_commonkrpc.c +++ b/sys/fs/nfs/nfs_commonkrpc.c @@ -1,1431 +1,1435 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1991, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Socket operations for use by nfs */ #include "opt_kgssapi.h" #include "opt_nfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_nfsclient_nfs23_start_probe_func_t dtrace_nfscl_nfs234_start_probe; dtrace_nfsclient_nfs23_done_probe_func_t dtrace_nfscl_nfs234_done_probe; /* * Registered probes by RPC type. */ uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; #endif NFSSTATESPINLOCK; NFSREQSPINLOCK; NFSDLOCKMUTEX; NFSCLSTATEMUTEX; extern struct nfsstatsv1 nfsstatsv1; extern struct nfsreqhead nfsd_reqq; extern int nfscl_ticks; extern void (*ncl_call_invalcaches)(struct vnode *); extern int nfs_numnfscbd; extern int nfscl_debuglevel; extern int nfsrv_lease; SVCPOOL *nfscbd_pool; static int nfsrv_gsscallbackson = 0; static int nfs_bufpackets = 4; static int nfs_reconnects; static int nfs3_jukebox_delay = 10; static int nfs_skip_wcc_data_onerr = 1; static int nfs_dsretries = 2; SYSCTL_DECL(_vfs_nfs); SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, "Buffer reservation size 2 < x < 64"); SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, "Number of times the nfs client has had to reconnect"); SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, "Number of seconds to delay a retry after receiving EJUKEBOX"); SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0, "Disable weak cache consistency checking when server returns an error"); SYSCTL_INT(_vfs_nfs, OID_AUTO, dsretries, CTLFLAG_RW, &nfs_dsretries, 0, "Number of retries for a DS RPC before failure"); static void nfs_down(struct nfsmount *, struct thread *, const char *, int, int); static void nfs_up(struct nfsmount *, struct thread *, const char *, int, int); static int nfs_msg(struct thread *, const char *, const char *, int); struct nfs_cached_auth { int ca_refs; /* refcount, including 1 from the cache */ uid_t ca_uid; /* uid that corresponds to this auth */ AUTH *ca_auth; /* RPC auth handle */ }; static int nfsv2_procid[NFS_V3NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, NFSV2PROC_LOOKUP, NFSV2PROC_NOOP, NFSV2PROC_READLINK, NFSV2PROC_READ, NFSV2PROC_WRITE, NFSV2PROC_CREATE, NFSV2PROC_MKDIR, NFSV2PROC_SYMLINK, NFSV2PROC_CREATE, NFSV2PROC_REMOVE, NFSV2PROC_RMDIR, NFSV2PROC_RENAME, NFSV2PROC_LINK, NFSV2PROC_READDIR, NFSV2PROC_NOOP, NFSV2PROC_STATFS, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, }; /* * Initialize sockets and congestion for a new NFS connection. * We do not free the sockaddr if error. * Which arguments are set to NULL indicate what kind of call it is. * cred == NULL --> a call to connect to a pNFS DS * nmp == NULL --> indicates an upcall to userland or a NFSv4.0 callback */ int newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, struct ucred *cred, NFSPROC_T *p, int callback_retry_mult, bool dotls) { int rcvreserve, sndreserve; int pktscale, pktscalesav; struct sockaddr *saddr; struct ucred *origcred; CLIENT *client; struct netconfig *nconf; struct socket *so; int one = 1, retries, error = 0; struct thread *td = curthread; SVCXPRT *xprt; struct timeval timo; /* * We need to establish the socket using the credentials of * the mountpoint. Some parts of this process (such as * sobind() and soconnect()) will use the curent thread's * credential instead of the socket credential. To work * around this, temporarily change the current thread's * credential to that of the mountpoint. * * XXX: It would be better to explicitly pass the correct * credential to sobind() and soconnect(). */ origcred = td->td_ucred; /* * Use the credential in nr_cred, if not NULL. */ if (nrp->nr_cred != NULL) td->td_ucred = nrp->nr_cred; else td->td_ucred = cred; saddr = nrp->nr_nam; if (saddr->sa_family == AF_INET) if (nrp->nr_sotype == SOCK_DGRAM) nconf = getnetconfigent("udp"); else nconf = getnetconfigent("tcp"); else if (nrp->nr_sotype == SOCK_DGRAM) nconf = getnetconfigent("udp6"); else nconf = getnetconfigent("tcp6"); pktscale = nfs_bufpackets; if (pktscale < 2) pktscale = 2; if (pktscale > 64) pktscale = 64; pktscalesav = pktscale; /* * soreserve() can fail if sb_max is too small, so shrink pktscale * and try again if there is an error. * Print a log message suggesting increasing sb_max. * Creating a socket and doing this is necessary since, if the * reservation sizes are too large and will make soreserve() fail, * the connection will work until a large send is attempted and * then it will loop in the krpc code. */ so = NULL; saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *); error = socreate(saddr->sa_family, &so, nrp->nr_sotype, nrp->nr_soproto, td->td_ucred, td); if (error != 0) goto out; do { if (error != 0 && pktscale > 2) { if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM && pktscale == pktscalesav) printf("Consider increasing kern.ipc.maxsockbuf\n"); pktscale--; } if (nrp->nr_sotype == SOCK_DGRAM) { if (nmp != NULL) { sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * pktscale; rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } else { if (nrp->nr_sotype != SOCK_STREAM) panic("nfscon sotype"); if (nmp != NULL) { sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } error = soreserve(so, sndreserve, rcvreserve); if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM && pktscale <= 2) printf("Must increase kern.ipc.maxsockbuf or reduce" " rsize, wsize\n"); } while (error != 0 && pktscale > 2); soclose(so); if (error != 0) goto out; client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog, nrp->nr_vers, sndreserve, rcvreserve); CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq"); if (nmp != NULL) { if ((nmp->nm_flag & NFSMNT_INT)) CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); if ((nmp->nm_flag & NFSMNT_RESVPORT)) CLNT_CONTROL(client, CLSET_PRIVPORT, &one); - if (NFSHASTLS(nmp)) + if (NFSHASTLS(nmp)) { CLNT_CONTROL(client, CLSET_TLS, &one); + if (nmp->nm_tlscertname != NULL) + CLNT_CONTROL(client, CLSET_TLSCERTNAME, + nmp->nm_tlscertname); + } if (NFSHASSOFT(nmp)) { if (nmp->nm_sotype == SOCK_DGRAM) /* * For UDP, the large timeout for a reconnect * will be set to "nm_retry * nm_timeo / 2", so * we only want to do 2 reconnect timeout * retries. */ retries = 2; else retries = nmp->nm_retry; } else retries = INT_MAX; if (NFSHASNFSV4N(nmp)) { if (cred != NULL) { if (NFSHASSOFT(nmp)) { /* * This should be a DS mount. * Use CLSET_TIMEOUT to set the timeout * for connections to DSs instead of * specifying a timeout on each RPC. * This is done so that SO_SNDTIMEO * is set on the TCP socket as well * as specifying a time limit when * waiting for an RPC reply. Useful * if the send queue for the TCP * connection has become constipated, * due to a failed DS. * The choice of lease_duration / 4 is * fairly arbitrary, but seems to work * ok, with a lower bound of 10sec. */ timo.tv_sec = nfsrv_lease / 4; if (timo.tv_sec < 10) timo.tv_sec = 10; timo.tv_usec = 0; CLNT_CONTROL(client, CLSET_TIMEOUT, &timo); } /* * Make sure the nfscbd_pool doesn't get * destroyed while doing this. */ NFSD_LOCK(); if (nfs_numnfscbd > 0) { nfs_numnfscbd++; NFSD_UNLOCK(); xprt = svc_vc_create_backchannel( nfscbd_pool); CLNT_CONTROL(client, CLSET_BACKCHANNEL, xprt); NFSD_LOCK(); nfs_numnfscbd--; if (nfs_numnfscbd == 0) wakeup(&nfs_numnfscbd); } NFSD_UNLOCK(); } else { /* * cred == NULL for a DS connect. * For connects to a DS, set a retry limit * so that failed DSs will be detected. * This is ok for NFSv4.1, since a DS does * not maintain open/lock state and is the * only case where using a "soft" mount is * recommended for NFSv4. * For mounts from the MDS to DS, this is done * via mount options, but that is not the case * here. The retry limit here can be adjusted * via the sysctl vfs.nfs.dsretries. * See the comment above w.r.t. timeout. */ timo.tv_sec = nfsrv_lease / 4; if (timo.tv_sec < 10) timo.tv_sec = 10; timo.tv_usec = 0; CLNT_CONTROL(client, CLSET_TIMEOUT, &timo); retries = nfs_dsretries; } } } else { /* * Three cases: * - Null RPC callback to client * - Non-Null RPC callback to client, wait a little longer * - upcalls to nfsuserd and gssd (clp == NULL) */ if (callback_retry_mult == 0) { retries = NFSV4_UPCALLRETRY; CLNT_CONTROL(client, CLSET_PRIVPORT, &one); } else { retries = NFSV4_CALLBACKRETRY * callback_retry_mult; } if (dotls) CLNT_CONTROL(client, CLSET_TLS, &one); } CLNT_CONTROL(client, CLSET_RETRIES, &retries); if (nmp != NULL) { /* * For UDP, there are 2 timeouts: * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer * that does a retransmit of an RPC request using the same * socket and xid. This is what you normally want to do, * since NFS servers depend on "same xid" for their * Duplicate Request Cache. * - timeout specified in CLNT_CALL_MBUF(), which specifies when * retransmits on the same socket should fail and a fresh * socket created. Each of these timeouts counts as one * CLSET_RETRIES as set above. * Set the initial retransmit timeout for UDP. This timeout * doesn't exist for TCP and the following call just fails, * which is ok. */ timo.tv_sec = nmp->nm_timeo / NFS_HZ; timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); } mtx_lock(&nrp->nr_mtx); if (nrp->nr_client != NULL) { mtx_unlock(&nrp->nr_mtx); /* * Someone else already connected. */ CLNT_RELEASE(client); } else { nrp->nr_client = client; /* * Protocols that do not require connections may be optionally * left unconnected for servers that reply from a port other * than NFS_PORT. */ if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) { mtx_unlock(&nrp->nr_mtx); CLNT_CONTROL(client, CLSET_CONNECT, &one); } else mtx_unlock(&nrp->nr_mtx); } out: /* Restore current thread's credentials. */ td->td_ucred = origcred; NFSEXITCODE(error); return (error); } /* * NFS disconnect. Clean up and unlink. */ void newnfs_disconnect(struct nfssockreq *nrp) { CLIENT *client; mtx_lock(&nrp->nr_mtx); if (nrp->nr_client != NULL) { client = nrp->nr_client; nrp->nr_client = NULL; mtx_unlock(&nrp->nr_mtx); rpc_gss_secpurge_call(client); CLNT_CLOSE(client); CLNT_RELEASE(client); } else { mtx_unlock(&nrp->nr_mtx); } } static AUTH * nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, char *srv_principal, gss_OID mech_oid, struct ucred *cred) { rpc_gss_service_t svc; AUTH *auth; switch (secflavour) { case RPCSEC_GSS_KRB5: case RPCSEC_GSS_KRB5I: case RPCSEC_GSS_KRB5P: if (!mech_oid) { if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid)) return (NULL); } if (secflavour == RPCSEC_GSS_KRB5) svc = rpc_gss_svc_none; else if (secflavour == RPCSEC_GSS_KRB5I) svc = rpc_gss_svc_integrity; else svc = rpc_gss_svc_privacy; if (clnt_principal == NULL) auth = rpc_gss_secfind_call(nrp->nr_client, cred, srv_principal, mech_oid, svc); else { auth = rpc_gss_seccreate_call(nrp->nr_client, cred, clnt_principal, srv_principal, "kerberosv5", svc, NULL, NULL, NULL); return (auth); } if (auth != NULL) return (auth); /* fallthrough */ case AUTH_SYS: default: return (authunix_create(cred)); } } /* * Callback from the RPC code to generate up/down notifications. */ struct nfs_feedback_arg { struct nfsmount *nf_mount; int nf_lastmsg; /* last tprintf */ int nf_tprintfmsg; struct thread *nf_td; }; static void nfs_feedback(int type, int proc, void *arg) { struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; struct nfsmount *nmp = nf->nf_mount; time_t now; switch (type) { case FEEDBACK_REXMIT2: case FEEDBACK_RECONNECT: now = NFSD_MONOSEC; if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) { nfs_down(nmp, nf->nf_td, "not responding", 0, NFSSTA_TIMEO); nf->nf_tprintfmsg = TRUE; nf->nf_lastmsg = now; } break; case FEEDBACK_OK: nfs_up(nf->nf_mount, nf->nf_td, "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); break; } } /* * newnfs_request - goes something like this * - does the rpc by calling the krpc layer * - break down rpc header and return with nfs reply * nb: always frees up nd_mreq mbuf list */ int newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp, struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers, u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep) { uint32_t retseq, retval, slotseq, *tl; time_t waituntil; int i = 0, j = 0, opcnt, set_sigset = 0, slot; int error = 0, usegssname = 0, secflavour = AUTH_SYS; int freeslot, maxslot, reterr, slotpos, timeo; u_int16_t procnum; u_int trylater_delay = 1; struct nfs_feedback_arg nf; struct timeval timo; AUTH *auth; struct rpc_callextra ext; enum clnt_stat stat; struct nfsreq *rep = NULL; char *srv_principal = NULL, *clnt_principal = NULL; sigset_t oldset; struct ucred *authcred; struct nfsclsession *sep; uint8_t sessionid[NFSX_V4SESSIONID]; sep = dssep; if (xidp != NULL) *xidp = 0; /* Reject requests while attempting a forced unmount. */ if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) { m_freem(nd->nd_mreq); return (ESTALE); } /* * Set authcred, which is used to acquire RPC credentials to * the cred argument, by default. The crhold() should not be * necessary, but will ensure that some future code change * doesn't result in the credential being free'd prematurely. */ authcred = crhold(cred); /* For client side interruptible mounts, mask off the signals. */ if (nmp != NULL && td != NULL && NFSHASINT(nmp)) { newnfs_set_sigmask(td, &oldset); set_sigset = 1; } /* * XXX if not already connected call nfs_connect now. Longer * term, change nfs_mount to call nfs_connect unconditionally * and let clnt_reconnect_create handle reconnects. */ if (nrp->nr_client == NULL) newnfs_connect(nmp, nrp, cred, td, 0, false); /* * For a client side mount, nmp is != NULL and clp == NULL. For * server calls (callbacks or upcalls), nmp == NULL. */ if (clp != NULL) { NFSLOCKSTATE(); if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) { secflavour = RPCSEC_GSS_KRB5; if (nd->nd_procnum != NFSPROC_NULL) { if (clp->lc_flags & LCL_GSSINTEGRITY) secflavour = RPCSEC_GSS_KRB5I; else if (clp->lc_flags & LCL_GSSPRIVACY) secflavour = RPCSEC_GSS_KRB5P; } } NFSUNLOCKSTATE(); } else if (nmp != NULL && NFSHASKERB(nmp) && nd->nd_procnum != NFSPROC_NULL) { if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0) nd->nd_flag |= ND_USEGSSNAME; if ((nd->nd_flag & ND_USEGSSNAME) != 0) { /* * If there is a client side host based credential, * use that, otherwise use the system uid, if set. * The system uid is in the nmp->nm_sockreq.nr_cred * credentials. */ if (nmp->nm_krbnamelen > 0) { usegssname = 1; clnt_principal = nmp->nm_krbname; } else if (nmp->nm_uid != (uid_t)-1) { KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } } else if (nmp->nm_krbnamelen == 0 && nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) { /* * If there is no host based principal name and * the system uid is set and this is root, use the * system uid, since root won't have user * credentials in a credentials cache file. * The system uid is in the nmp->nm_sockreq.nr_cred * credentials. */ KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } if (NFSHASINTEGRITY(nmp)) secflavour = RPCSEC_GSS_KRB5I; else if (NFSHASPRIVACY(nmp)) secflavour = RPCSEC_GSS_KRB5P; else secflavour = RPCSEC_GSS_KRB5; srv_principal = NFSMNT_SRVKRBNAME(nmp); } else if (nmp != NULL && !NFSHASKERB(nmp) && nd->nd_procnum != NFSPROC_NULL && (nd->nd_flag & ND_USEGSSNAME) != 0) { /* * Use the uid that did the mount when the RPC is doing * NFSv4 system operations, as indicated by the * ND_USEGSSNAME flag, for the AUTH_SYS case. * The credentials in nm_sockreq.nr_cred were used for the * mount. */ KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } if (nmp != NULL) { bzero(&nf, sizeof(struct nfs_feedback_arg)); nf.nf_mount = nmp; nf.nf_td = td; nf.nf_lastmsg = NFSD_MONOSEC - ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay)); } if (nd->nd_procnum == NFSPROC_NULL) auth = authnone_create(); else if (usegssname) { /* * For this case, the authenticator is held in the * nfssockreq structure, so don't release the reference count * held on it. --> Don't AUTH_DESTROY() it in this function. */ if (nrp->nr_auth == NULL) nrp->nr_auth = nfs_getauth(nrp, secflavour, clnt_principal, srv_principal, NULL, authcred); else rpc_gss_refresh_auth_call(nrp->nr_auth); auth = nrp->nr_auth; } else auth = nfs_getauth(nrp, secflavour, NULL, srv_principal, NULL, authcred); crfree(authcred); if (auth == NULL) { m_freem(nd->nd_mreq); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (EACCES); } bzero(&ext, sizeof(ext)); ext.rc_auth = auth; if (nmp != NULL) { ext.rc_feedback = nfs_feedback; ext.rc_feedback_arg = &nf; } procnum = nd->nd_procnum; if ((nd->nd_flag & ND_NFSV4) && nd->nd_procnum != NFSPROC_NULL && nd->nd_procnum != NFSV4PROC_CBCOMPOUND) procnum = NFSV4PROC_COMPOUND; if (nmp != NULL) { NFSINCRGLOBAL(nfsstatsv1.rpcrequests); /* Map the procnum to the old NFSv2 one, as required. */ if ((nd->nd_flag & ND_NFSV2) != 0) { if (nd->nd_procnum < NFS_V3NPROCS) procnum = nfsv2_procid[nd->nd_procnum]; else procnum = NFSV2PROC_NOOP; } /* * Now only used for the R_DONTRECOVER case, but until that is * supported within the krpc code, I need to keep a queue of * outstanding RPCs for nfsv4 client requests. */ if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND) rep = malloc(sizeof(struct nfsreq), M_NFSDREQ, M_WAITOK); #ifdef KDTRACE_HOOKS if (dtrace_nfscl_nfs234_start_probe != NULL) { uint32_t probe_id; int probe_procnum; if (nd->nd_flag & ND_NFSV4) { probe_id = nfscl_nfs4_start_probes[nd->nd_procnum]; probe_procnum = nd->nd_procnum; } else if (nd->nd_flag & ND_NFSV3) { probe_id = nfscl_nfs3_start_probes[procnum]; probe_procnum = procnum; } else { probe_id = nfscl_nfs2_start_probes[nd->nd_procnum]; probe_procnum = procnum; } if (probe_id != 0) (dtrace_nfscl_nfs234_start_probe) (probe_id, vp, nd->nd_mreq, cred, probe_procnum); } #endif } freeslot = -1; /* Set to slot that needs to be free'd */ tryagain: slot = -1; /* Slot that needs a sequence# increment. */ /* * This timeout specifies when a new socket should be created, * along with new xid values. For UDP, this should be done * infrequently, since retransmits of RPC requests should normally * use the same xid. */ if (nmp == NULL) { timo.tv_usec = 0; if (clp == NULL) timo.tv_sec = NFSV4_UPCALLTIMEO; else timo.tv_sec = NFSV4_CALLBACKTIMEO; } else { if (nrp->nr_sotype != SOCK_DGRAM) { timo.tv_usec = 0; if ((nmp->nm_flag & NFSMNT_NFSV4)) timo.tv_sec = INT_MAX; else timo.tv_sec = NFS_TCPTIMEO; } else { if (NFSHASSOFT(nmp)) { /* * CLSET_RETRIES is set to 2, so this should be * half of the total timeout required. */ timeo = nmp->nm_retry * nmp->nm_timeo / 2; if (timeo < 1) timeo = 1; timo.tv_sec = timeo / NFS_HZ; timo.tv_usec = (timeo % NFS_HZ) * 1000000 / NFS_HZ; } else { /* For UDP hard mounts, use a large value. */ timo.tv_sec = NFS_MAXTIMEO / NFS_HZ; timo.tv_usec = 0; } } if (rep != NULL) { rep->r_flags = 0; rep->r_nmp = nmp; /* * Chain request into list of outstanding requests. */ NFSLOCKREQ(); TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain); NFSUNLOCKREQ(); } } nd->nd_mrep = NULL; if (clp != NULL && sep != NULL) stat = clnt_bck_call(nrp->nr_client, &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt); else stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo); NFSCL_DEBUG(2, "clnt call=%d\n", stat); if (rep != NULL) { /* * RPC done, unlink the request. */ NFSLOCKREQ(); TAILQ_REMOVE(&nfsd_reqq, rep, r_chain); NFSUNLOCKREQ(); } /* * If there was a successful reply and a tprintf msg. * tprintf a response. */ if (stat == RPC_SUCCESS) { error = 0; } else if (stat == RPC_TIMEDOUT) { NFSINCRGLOBAL(nfsstatsv1.rpctimeouts); error = ETIMEDOUT; } else if (stat == RPC_VERSMISMATCH) { NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EOPNOTSUPP; } else if (stat == RPC_PROGVERSMISMATCH) { NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EPROTONOSUPPORT; } else if (stat == RPC_INTR) { error = EINTR; } else if (stat == RPC_CANTSEND || stat == RPC_CANTRECV || stat == RPC_SYSTEMERROR) { /* Check for a session slot that needs to be free'd. */ if ((nd->nd_flag & (ND_NFSV41 | ND_HASSLOTID)) == (ND_NFSV41 | ND_HASSLOTID) && nmp != NULL && nd->nd_procnum != NFSPROC_NULL) { /* * This should only occur when either the MDS or * a client has an RPC against a DS fail. * This happens because these cases use "soft" * connections that can time out and fail. * The slot used for this RPC is now in a * non-deterministic state, but if the slot isn't * free'd, threads can get stuck waiting for a slot. */ if (sep == NULL) sep = nfsmnt_mdssession(nmp); /* * Bump the sequence# out of range, so that reuse of * this slot will result in an NFSERR_SEQMISORDERED * error and not a bogus cached RPC reply. */ mtx_lock(&sep->nfsess_mtx); sep->nfsess_slotseq[nd->nd_slotid] += 10; mtx_unlock(&sep->nfsess_mtx); /* And free the slot. */ nfsv4_freeslot(sep, nd->nd_slotid); } NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = ENXIO; } else { NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EACCES; } if (error) { m_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); if (rep != NULL) free(rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (error); } KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n")); /* * Search for any mbufs that are not a multiple of 4 bytes long * or with m_data not longword aligned. * These could cause pointer alignment problems, so copy them to * well aligned mbufs. */ newnfs_realign(&nd->nd_mrep, M_WAITOK); nd->nd_md = nd->nd_mrep; nd->nd_dpos = mtod(nd->nd_md, caddr_t); nd->nd_repstat = 0; if (nd->nd_procnum != NFSPROC_NULL && nd->nd_procnum != NFSV4PROC_CBNULL) { /* If sep == NULL, set it to the default in nmp. */ if (sep == NULL && nmp != NULL) sep = nfsmnt_mdssession(nmp); /* * and now the actual NFS xdr. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl); if (nd->nd_repstat >= 10000) NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum, (int)nd->nd_repstat); /* * Get rid of the tag, return count and SEQUENCE result for * NFSv4. */ if ((nd->nd_flag & ND_NFSV4) != 0 && nd->nd_repstat != NFSERR_MINORVERMISMATCH) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); opcnt = fxdr_unsigned(int, *tl++); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); if (j >= 10000) NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j); /* * If the first op is Sequence, free up the slot. */ if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) || (clp != NULL && i == NFSV4OP_CBSEQUENCE && j != 0)) NFSCL_DEBUG(1, "failed seq=%d\n", j); if (((nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) || (clp != NULL && i == NFSV4OP_CBSEQUENCE && j == 0)) && sep != NULL) { if (i == NFSV4OP_SEQUENCE) NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED); else NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); mtx_lock(&sep->nfsess_mtx); if (bcmp(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID) == 0) { tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; retseq = fxdr_unsigned(uint32_t, *tl++); slot = fxdr_unsigned(int, *tl++); freeslot = slot; if (retseq != sep->nfsess_slotseq[slot]) printf("retseq diff 0x%x\n", retseq); retval = fxdr_unsigned(uint32_t, *++tl); if ((retval + 1) < sep->nfsess_foreslots ) sep->nfsess_foreslots = (retval + 1); else if ((retval + 1) > sep->nfsess_foreslots) sep->nfsess_foreslots = (retval < 64) ? (retval + 1) : 64; } mtx_unlock(&sep->nfsess_mtx); /* Grab the op and status for the next one. */ if (opcnt > 1) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); } } } if (nd->nd_repstat != 0) { if (nd->nd_repstat == NFSERR_BADSESSION && nmp != NULL && dssep == NULL && (nd->nd_flag & ND_NFSV41) != 0) { /* * If this is a client side MDS RPC, mark * the MDS session defunct and initiate * recovery, as required. * The nfsess_defunct field is protected by * the NFSLOCKMNT()/nm_mtx lock and not the * nfsess_mtx lock to simplify its handling, * for the MDS session. This lock is also * sufficient for nfsess_sessionid, since it * never changes in the structure. */ NFSCL_DEBUG(1, "Got badsession\n"); NFSLOCKCLSTATE(); NFSLOCKMNT(nmp); sep = NFSMNT_MDSSESSION(nmp); if (bcmp(sep->nfsess_sessionid, nd->nd_sequence, NFSX_V4SESSIONID) == 0) { /* Initiate recovery. */ sep->nfsess_defunct = 1; NFSCL_DEBUG(1, "Marked defunct\n"); if (nmp->nm_clp != NULL) { nmp->nm_clp->nfsc_flags |= NFSCLFLAGS_RECOVER; wakeup(nmp->nm_clp); } } NFSUNLOCKCLSTATE(); /* * Sleep for up to 1sec waiting for a new * session. */ mtx_sleep(&nmp->nm_sess, &nmp->nm_mtx, PZERO, "nfsbadsess", hz); /* * Get the session again, in case a new one * has been created during the sleep. */ sep = NFSMNT_MDSSESSION(nmp); NFSUNLOCKMNT(nmp); if ((nd->nd_flag & ND_LOOPBADSESS) != 0) { reterr = nfsv4_sequencelookup(nmp, sep, &slotpos, &maxslot, &slotseq, sessionid); if (reterr == 0) { /* Fill in new session info. */ NFSCL_DEBUG(1, "Filling in new sequence\n"); tl = nd->nd_sequence; bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl = txdr_unsigned(maxslot); } if (reterr == NFSERR_BADSESSION || reterr == 0) { NFSCL_DEBUG(1, "Badsession looping\n"); m_freem(nd->nd_mrep); nd->nd_mrep = NULL; goto tryagain; } nd->nd_repstat = reterr; NFSCL_DEBUG(1, "Got err=%d\n", reterr); } } /* * When clp != NULL, it is a callback and all * callback operations can be retried for NFSERR_DELAY. */ if (((nd->nd_repstat == NFSERR_DELAY || nd->nd_repstat == NFSERR_GRACE) && (nd->nd_flag & ND_NFSV4) && (clp != NULL || (nd->nd_procnum != NFSPROC_DELEGRETURN && nd->nd_procnum != NFSPROC_SETATTR && nd->nd_procnum != NFSPROC_READ && nd->nd_procnum != NFSPROC_READDS && nd->nd_procnum != NFSPROC_WRITE && nd->nd_procnum != NFSPROC_WRITEDS && nd->nd_procnum != NFSPROC_OPEN && nd->nd_procnum != NFSPROC_CREATE && nd->nd_procnum != NFSPROC_OPENCONFIRM && nd->nd_procnum != NFSPROC_OPENDOWNGRADE && nd->nd_procnum != NFSPROC_CLOSE && nd->nd_procnum != NFSPROC_LOCK && nd->nd_procnum != NFSPROC_LOCKU))) || (nd->nd_repstat == NFSERR_DELAY && (nd->nd_flag & ND_NFSV4) == 0) || nd->nd_repstat == NFSERR_RESOURCE) { if (trylater_delay > NFS_TRYLATERDEL) trylater_delay = NFS_TRYLATERDEL; waituntil = NFSD_MONOSEC + trylater_delay; while (NFSD_MONOSEC < waituntil) (void) nfs_catnap(PZERO, 0, "nfstry"); trylater_delay *= 2; if (slot != -1) { mtx_lock(&sep->nfsess_mtx); sep->nfsess_slotseq[slot]++; *nd->nd_slotseq = txdr_unsigned( sep->nfsess_slotseq[slot]); mtx_unlock(&sep->nfsess_mtx); } m_freem(nd->nd_mrep); nd->nd_mrep = NULL; goto tryagain; } /* * If the File Handle was stale, invalidate the * lookup cache, just in case. * (vp != NULL implies a client side call) */ if (nd->nd_repstat == ESTALE && vp != NULL) { cache_purge(vp); if (ncl_call_invalcaches != NULL) (*ncl_call_invalcaches)(vp); } } if ((nd->nd_flag & ND_NFSV4) != 0) { /* Free the slot, as required. */ if (freeslot != -1) nfsv4_freeslot(sep, freeslot); /* * If this op is Putfh, throw its results away. */ if (j >= 10000) NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j); if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) { NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); if (j >= 10000) NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i, j); /* * All Compounds that do an Op that must * be in sequence consist of NFSV4OP_PUTFH * followed by one of these. As such, we * can determine if the seqid# should be * incremented, here. */ if ((i == NFSV4OP_OPEN || i == NFSV4OP_OPENCONFIRM || i == NFSV4OP_OPENDOWNGRADE || i == NFSV4OP_CLOSE || i == NFSV4OP_LOCK || i == NFSV4OP_LOCKU) && (j == 0 || (j != NFSERR_STALECLIENTID && j != NFSERR_STALESTATEID && j != NFSERR_BADSTATEID && j != NFSERR_BADSEQID && j != NFSERR_BADXDR && j != NFSERR_RESOURCE && j != NFSERR_NOFILEHANDLE))) nd->nd_flag |= ND_INCRSEQID; } /* * If this op's status is non-zero, mark * that there is no more data to process. * The exception is Setattr, which always has xdr * when it has failed. */ if (j != 0 && i != NFSV4OP_SETATTR) nd->nd_flag |= ND_NOMOREDATA; /* * If R_DONTRECOVER is set, replace the stale error * reply, so that recovery isn't initiated. */ if ((nd->nd_repstat == NFSERR_STALECLIENTID || nd->nd_repstat == NFSERR_BADSESSION || nd->nd_repstat == NFSERR_STALESTATEID) && rep != NULL && (rep->r_flags & R_DONTRECOVER)) nd->nd_repstat = NFSERR_STALEDONTRECOVER; } } #ifdef KDTRACE_HOOKS if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) { uint32_t probe_id; int probe_procnum; if (nd->nd_flag & ND_NFSV4) { probe_id = nfscl_nfs4_done_probes[nd->nd_procnum]; probe_procnum = nd->nd_procnum; } else if (nd->nd_flag & ND_NFSV3) { probe_id = nfscl_nfs3_done_probes[procnum]; probe_procnum = procnum; } else { probe_id = nfscl_nfs2_done_probes[nd->nd_procnum]; probe_procnum = procnum; } if (probe_id != 0) (dtrace_nfscl_nfs234_done_probe)(probe_id, vp, nd->nd_mreq, cred, probe_procnum, 0); } #endif m_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); if (rep != NULL) free(rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (0); nfsmout: m_freem(nd->nd_mrep); m_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); if (rep != NULL) free(rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (error); } /* * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and * wait for all requests to complete. This is used by forced unmounts * to terminate any outstanding RPCs. */ int newnfs_nmcancelreqs(struct nfsmount *nmp) { struct nfsclds *dsp; struct __rpc_client *cl; if (nmp->nm_sockreq.nr_client != NULL) CLNT_CLOSE(nmp->nm_sockreq.nr_client); lookformore: NFSLOCKMNT(nmp); TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) { NFSLOCKDS(dsp); if (dsp != TAILQ_FIRST(&nmp->nm_sess) && (dsp->nfsclds_flags & NFSCLDS_CLOSED) == 0 && dsp->nfsclds_sockp != NULL && dsp->nfsclds_sockp->nr_client != NULL) { dsp->nfsclds_flags |= NFSCLDS_CLOSED; cl = dsp->nfsclds_sockp->nr_client; NFSUNLOCKDS(dsp); NFSUNLOCKMNT(nmp); CLNT_CLOSE(cl); goto lookformore; } NFSUNLOCKDS(dsp); } NFSUNLOCKMNT(nmp); return (0); } /* * Any signal that can interrupt an NFS operation in an intr mount * should be added to this set. SIGSTOP and SIGKILL cannot be masked. */ int newnfs_sig_set[] = { SIGINT, SIGTERM, SIGHUP, SIGKILL, SIGQUIT }; /* * Check to see if one of the signals in our subset is pending on * the process (in an intr mount). */ static int nfs_sig_pending(sigset_t set) { int i; for (i = 0 ; i < nitems(newnfs_sig_set); i++) if (SIGISMEMBER(set, newnfs_sig_set[i])) return (1); return (0); } /* * The set/restore sigmask functions are used to (temporarily) overwrite * the thread td_sigmask during an RPC call (for example). These are also * used in other places in the NFS client that might tsleep(). */ void newnfs_set_sigmask(struct thread *td, sigset_t *oldset) { sigset_t newset; int i; struct proc *p; SIGFILLSET(newset); if (td == NULL) td = curthread; /* XXX */ p = td->td_proc; /* Remove the NFS set of signals from newset */ PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); for (i = 0 ; i < nitems(newnfs_sig_set); i++) { /* * But make sure we leave the ones already masked * by the process, ie. remove the signal from the * temporary signalmask only if it wasn't already * in p_sigmask. */ if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) && !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i])) SIGDELSET(newset, newnfs_sig_set[i]); } mtx_unlock(&p->p_sigacts->ps_mtx); kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, SIGPROCMASK_PROC_LOCKED); PROC_UNLOCK(p); } void newnfs_restore_sigmask(struct thread *td, sigset_t *set) { if (td == NULL) td = curthread; /* XXX */ kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); } /* * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the * old one after msleep() returns. */ int newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) { sigset_t oldset; int error; if ((priority & PCATCH) == 0) return msleep(ident, mtx, priority, wmesg, timo); if (td == NULL) td = curthread; /* XXX */ newnfs_set_sigmask(td, &oldset); error = msleep(ident, mtx, priority, wmesg, timo); newnfs_restore_sigmask(td, &oldset); return (error); } /* * Test for a termination condition pending on the process. * This is used for NFSMNT_INT mounts. */ int newnfs_sigintr(struct nfsmount *nmp, struct thread *td) { struct proc *p; sigset_t tmpset; /* Terminate all requests while attempting a forced unmount. */ if (NFSCL_FORCEDISM(nmp->nm_mountp)) return (EIO); if (!(nmp->nm_flag & NFSMNT_INT)) return (0); if (td == NULL) return (0); p = td->td_proc; PROC_LOCK(p); tmpset = p->p_siglist; SIGSETOR(tmpset, td->td_siglist); SIGSETNAND(tmpset, td->td_sigmask); mtx_lock(&p->p_sigacts->ps_mtx); SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); mtx_unlock(&p->p_sigacts->ps_mtx); if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) && nfs_sig_pending(tmpset)) { PROC_UNLOCK(p); return (EINTR); } PROC_UNLOCK(p); return (0); } static int nfs_msg(struct thread *td, const char *server, const char *msg, int error) { struct proc *p; p = td ? td->td_proc : NULL; if (error) { tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, msg, error); } else { tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); } return (0); } static void nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, int error, int flags) { if (nmp == NULL) return; mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state |= NFSSTA_TIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESP, 0); } else mtx_unlock(&nmp->nm_mtx); mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state |= NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 0); } else mtx_unlock(&nmp->nm_mtx); nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); } static void nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, int flags, int tprintfmsg) { if (nmp == NULL) return; if (tprintfmsg) { nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); } mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state &= ~NFSSTA_TIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESP, 1); } else mtx_unlock(&nmp->nm_mtx); mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state &= ~NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 1); } else mtx_unlock(&nmp->nm_mtx); } diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index 8b059ef1be9f..365b1c387cc2 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -1,2083 +1,2105 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_bootp.h" #include "opt_nfsroot.h" #include "opt_kern_tls.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(nfscl, "NFSv4 client"); extern int nfscl_ticks; extern struct timeval nfsboottime; extern int nfsrv_useacl; extern int nfscl_debuglevel; extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; extern struct mtx ncl_iod_mutex; NFSCLSTATEMUTEX; extern struct mtx nfsrv_dslock_mtx; MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header"); MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct"); SYSCTL_DECL(_vfs_nfs); static int nfs_ip_paranoia = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, &nfs_ip_paranoia, 0, ""); static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY, downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, ""); /* how long between console messages "nfs server foo not responding" */ static int nfs_tprintf_delay = NFS_TPRINTF_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY, downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); #ifdef NFS_DEBUG int nfs_debug; SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, "Toggle debug flag"); #endif static int nfs_mountroot(struct mount *); static void nfs_sec_name(char *, int *); static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, const char *, struct ucred *, struct thread *); static int mountnfs(struct nfs_args *, struct mount *, struct sockaddr *, char *, u_char *, int, u_char *, int, u_char *, int, struct vnode **, struct ucred *, - struct thread *, int, int, int, uint32_t); + struct thread *, int, int, int, uint32_t, char *); static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *, struct sockaddr_storage *, int *, off_t *, struct timeval *); static vfs_mount_t nfs_mount; static vfs_cmount_t nfs_cmount; static vfs_unmount_t nfs_unmount; static vfs_root_t nfs_root; static vfs_statfs_t nfs_statfs; static vfs_sync_t nfs_sync; static vfs_sysctl_t nfs_sysctl; static vfs_purge_t nfs_purge; /* * nfs vfs operations. */ static struct vfsops nfs_vfsops = { .vfs_init = ncl_init, .vfs_mount = nfs_mount, .vfs_cmount = nfs_cmount, .vfs_root = vfs_cache_root, .vfs_cachedroot = nfs_root, .vfs_statfs = nfs_statfs, .vfs_sync = nfs_sync, .vfs_uninit = ncl_uninit, .vfs_unmount = nfs_unmount, .vfs_sysctl = nfs_sysctl, .vfs_purge = nfs_purge, }; VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfs, 1); MODULE_DEPEND(nfs, nfscommon, 1, 1, 1); MODULE_DEPEND(nfs, krpc, 1, 1, 1); MODULE_DEPEND(nfs, nfssvc, 1, 1, 1); /* * This structure is now defined in sys/nfs/nfs_diskless.c so that it * can be shared by both NFS clients. It is declared here so that it * will be defined for kernels built without NFS_ROOT, although it * isn't used in that case. */ #if !defined(NFS_ROOT) struct nfs_diskless nfs_diskless = { { { 0 } } }; struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; int nfs_diskless_valid = 0; #endif SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD, &nfs_diskless_valid, 0, "Has the diskless struct been filled correctly"); SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, nfsv3_diskless.root_hostnam, 0, "Path to nfs root"); SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr), "%Ssockaddr_in", "Diskless root nfs address"); void newnfsargs_ntoh(struct nfs_args *); static int nfs_mountdiskless(char *, struct sockaddr_in *, struct nfs_args *, struct thread *, struct vnode **, struct mount *); static void nfs_convert_diskless(void); static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs); int newnfs_iosize(struct nfsmount *nmp) { int iosize, maxio; /* First, set the upper limit for iosize */ if (nmp->nm_flag & NFSMNT_NFSV4) { maxio = NFS_MAXBSIZE; } else if (nmp->nm_flag & NFSMNT_NFSV3) { if (nmp->nm_sotype == SOCK_DGRAM) maxio = NFS_MAXDGRAMDATA; else maxio = NFS_MAXBSIZE; } else { maxio = NFS_V2MAXDATA; } if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0) nmp->nm_rsize = maxio; if (nmp->nm_rsize > NFS_MAXBSIZE) nmp->nm_rsize = NFS_MAXBSIZE; if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0) nmp->nm_readdirsize = maxio; if (nmp->nm_readdirsize > nmp->nm_rsize) nmp->nm_readdirsize = nmp->nm_rsize; if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0) nmp->nm_wsize = maxio; if (nmp->nm_wsize > NFS_MAXBSIZE) nmp->nm_wsize = NFS_MAXBSIZE; /* * Calculate the size used for io buffers. Use the larger * of the two sizes to minimise nfs requests but make sure * that it is at least one VM page to avoid wasting buffer * space. It must also be at least NFS_DIRBLKSIZ, since * that is the buffer size used for directories. */ iosize = imax(nmp->nm_rsize, nmp->nm_wsize); iosize = imax(iosize, PAGE_SIZE); iosize = imax(iosize, NFS_DIRBLKSIZ); nmp->nm_mountp->mnt_stat.f_iosize = iosize; return (iosize); } static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) { args->version = NFS_ARGSVERSION; args->addr = oargs->addr; args->addrlen = oargs->addrlen; args->sotype = oargs->sotype; args->proto = oargs->proto; args->fh = oargs->fh; args->fhsize = oargs->fhsize; args->flags = oargs->flags; args->wsize = oargs->wsize; args->rsize = oargs->rsize; args->readdirsize = oargs->readdirsize; args->timeo = oargs->timeo; args->retrans = oargs->retrans; args->readahead = oargs->readahead; args->hostname = oargs->hostname; } static void nfs_convert_diskless(void) { bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, sizeof(struct ifaliasreq)); bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, sizeof(struct sockaddr_in)); nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args); if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) { nfsv3_diskless.root_fhsize = NFSX_MYFH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH); } else { nfsv3_diskless.root_fhsize = NFSX_V2FH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); } bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, sizeof(struct sockaddr_in)); bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN); nfsv3_diskless.root_time = nfs_diskless.root_time; bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam, MAXHOSTNAMELEN); nfs_diskless_valid = 3; } /* * nfs statfs call */ static int nfs_statfs(struct mount *mp, struct statfs *sbp) { struct vnode *vp; struct thread *td; struct nfsmount *nmp = VFSTONFS(mp); struct nfsvattr nfsva; struct nfsfsinfo fs; struct nfsstatfs sb; int error = 0, attrflag, gotfsinfo = 0, ret; struct nfsnode *np; td = curthread; error = vfs_busy(mp, MBF_NOWAIT); if (error) return (error); error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) { vfs_unbusy(mp); return (error); } vp = NFSTOV(np); mtx_lock(&nmp->nm_mtx); if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { mtx_unlock(&nmp->nm_mtx); error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); if (!error) gotfsinfo = 1; } else mtx_unlock(&nmp->nm_mtx); if (!error) error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); if (error != 0) NFSCL_DEBUG(2, "statfs=%d\n", error); if (attrflag == 0) { ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, td->td_ucred, td, &nfsva, NULL, NULL); if (ret) { /* * Just set default values to get things going. */ NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); nfsva.na_vattr.va_type = VDIR; nfsva.na_vattr.va_mode = 0777; nfsva.na_vattr.va_nlink = 100; nfsva.na_vattr.va_uid = (uid_t)0; nfsva.na_vattr.va_gid = (gid_t)0; nfsva.na_vattr.va_fileid = 2; nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; } } (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { mtx_lock(&nmp->nm_mtx); if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4)) nfscl_loadfsinfo(nmp, &fs); nfscl_loadsbinfo(nmp, &sb, sbp); sbp->f_iosize = newnfs_iosize(nmp); mtx_unlock(&nmp->nm_mtx); if (sbp != &mp->mnt_stat) { bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } vput(vp); vfs_unbusy(mp); return (error); } /* * nfs version 3 fsinfo rpc call */ int ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred, struct thread *td) { struct nfsfsinfo fs; struct nfsvattr nfsva; int error, attrflag; error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL); if (!error) { if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); mtx_lock(&nmp->nm_mtx); nfscl_loadfsinfo(nmp, &fs); mtx_unlock(&nmp->nm_mtx); } return (error); } /* * Mount a remote root fs via. nfs. This depends on the info in the * nfs_diskless structure that has been filled in properly by some primary * bootstrap. * It goes something like this: * - do enough of "ifconfig" by calling ifioctl() so that the system * can talk to the server * - If nfs_diskless.mygateway is filled in, use that address as * a default gateway. * - build the rootfs mount point and call mountnfs() to do the rest. * * It is assumed to be safe to read, modify, and write the nfsv3_diskless * structure, as well as other global NFS client variables here, as * nfs_mountroot() will be called once in the boot before any other NFS * client activity occurs. */ static int nfs_mountroot(struct mount *mp) { struct thread *td = curthread; struct nfsv3_diskless *nd = &nfsv3_diskless; struct socket *so; struct vnode *vp; struct ifreq ir; int error; u_long l; char buf[128]; char *cp; #if defined(BOOTP_NFSROOT) && defined(BOOTP) bootpc_init(); /* use bootp to get nfs_diskless filled in */ #elif defined(NFS_ROOT) nfs_setup_diskless(); #endif if (nfs_diskless_valid == 0) return (-1); if (nfs_diskless_valid == 1) nfs_convert_diskless(); /* * Do enough of ifconfig(8) so that the critical net interface can * talk to the server. */ error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0, td->td_ucred, td); if (error) panic("nfs_mountroot: socreate(%04x): %d", nd->myif.ifra_addr.sa_family, error); #if 0 /* XXX Bad idea */ /* * We might not have been told the right interface, so we pass * over the first ten interfaces of the same kind, until we get * one of them configured. */ for (i = strlen(nd->myif.ifra_name) - 1; nd->myif.ifra_name[i] >= '0' && nd->myif.ifra_name[i] <= '9'; nd->myif.ifra_name[i] ++) { error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if(!error) break; } #endif error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if (error) panic("nfs_mountroot: SIOCAIFADDR: %d", error); if ((cp = kern_getenv("boot.netif.mtu")) != NULL) { ir.ifr_mtu = strtol(cp, NULL, 10); bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ); freeenv(cp); error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td); if (error) printf("nfs_mountroot: SIOCSIFMTU: %d", error); } soclose(so); /* * If the gateway field is filled in, set it as the default route. * Note that pxeboot will set a default route of 0 if the route * is not set by the DHCP server. Check also for a value of 0 * to avoid panicking inappropriately in that situation. */ if (nd->mygateway.sin_len != 0 && nd->mygateway.sin_addr.s_addr != 0) { struct sockaddr_in mask, sin; struct epoch_tracker et; struct rt_addrinfo info; struct rib_cmd_info rc; bzero((caddr_t)&mask, sizeof(mask)); sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ NET_EPOCH_ENTER(et); CURVNET_SET(TD_TO_VNET(td)); bzero((caddr_t)&info, sizeof(info)); info.rti_flags = RTF_UP | RTF_GATEWAY; info.rti_info[RTAX_DST] = (struct sockaddr *)&sin; info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&nd->mygateway; info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&mask; error = rib_action(RT_DEFAULT_FIB, RTM_ADD, &info, &rc); CURVNET_RESTORE(); NET_EPOCH_EXIT(et); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); } /* * Create the rootfs mount point. */ nd->root_args.fh = nd->root_fh; nd->root_args.fhsize = nd->root_fhsize; l = ntohl(nd->root_saddr.sin_addr.s_addr); snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", (l >> 24) & 0xff, (l >> 16) & 0xff, (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam); printf("NFS ROOT: %s\n", buf); nd->root_args.hostname = buf; if ((error = nfs_mountdiskless(buf, &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) { return (error); } /* * This is not really an nfs issue, but it is much easier to * set hostname here and then let the "/etc/rc.xxx" files * mount the right /var based upon its preset value. */ mtx_lock(&prison0.pr_mtx); strlcpy(prison0.pr_hostname, nd->my_hostnam, sizeof(prison0.pr_hostname)); mtx_unlock(&prison0.pr_mtx); inittodr(ntohl(nd->root_time)); return (0); } /* * Internal version of mount system call for diskless setup. */ static int nfs_mountdiskless(char *path, struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, struct vnode **vpp, struct mount *mp) { struct sockaddr *nam; int dirlen, error; char *dirpath; /* * Find the directory path in "path", which also has the server's * name/ip address in it. */ dirpath = strchr(path, ':'); if (dirpath != NULL) dirlen = strlen(++dirpath); else dirlen = 0; nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen, NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO, - NFS_DEFAULT_NEGNAMETIMEO, 0, 0)) != 0) { + NFS_DEFAULT_NEGNAMETIMEO, 0, 0, NULL)) != 0) { printf("nfs_mountroot: mount %s on /: %d\n", path, error); return (error); } return (0); } static void nfs_sec_name(char *sec, int *flagsp) { if (!strcmp(sec, "krb5")) *flagsp |= NFSMNT_KERB; else if (!strcmp(sec, "krb5i")) *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY); else if (!strcmp(sec, "krb5p")) *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY); } static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, const char *hostname, struct ucred *cred, struct thread *td) { int adjsock; char *p; /* * Set read-only flag if requested; otherwise, clear it if this is * an update. If this is not an update, then either the read-only * flag is already clear, or this is a root mount and it was set * intentionally at some previous point. */ if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); } else if (mp->mnt_flag & MNT_UPDATE) { MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); } /* * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes * no sense in that context. Also, set up appropriate retransmit * and soft timeout behavior. */ if (argp->sotype == SOCK_STREAM) { nmp->nm_flag &= ~NFSMNT_NOCONN; nmp->nm_timeo = NFS_MAXTIMEO; if ((argp->flags & NFSMNT_NFSV4) != 0) nmp->nm_retry = INT_MAX; else nmp->nm_retry = NFS_RETRANS_TCP; } /* Also clear RDIRPLUS if NFSv2, it crashes some servers */ if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { argp->flags &= ~NFSMNT_RDIRPLUS; nmp->nm_flag &= ~NFSMNT_RDIRPLUS; } /* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */ if (nmp->nm_minorvers == 0) { argp->flags &= ~NFSMNT_ONEOPENOWN; nmp->nm_flag &= ~NFSMNT_ONEOPENOWN; } /* Re-bind if rsrvd port requested and wasn't on one */ adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) && (argp->flags & NFSMNT_RESVPORT); /* Also re-bind if we're switching to/from a connected UDP socket */ adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) != (argp->flags & NFSMNT_NOCONN)); /* Update flags atomically. Don't change the lock bits. */ nmp->nm_flag = argp->flags | nmp->nm_flag; if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; if (nmp->nm_timeo < NFS_MINTIMEO) nmp->nm_timeo = NFS_MINTIMEO; else if (nmp->nm_timeo > NFS_MAXTIMEO) nmp->nm_timeo = NFS_MAXTIMEO; } if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { nmp->nm_retry = argp->retrans; if (nmp->nm_retry > NFS_MAXREXMIT) nmp->nm_retry = NFS_MAXREXMIT; } if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { nmp->nm_wsize = argp->wsize; /* * Clip at the power of 2 below the size. There is an * issue (not isolated) that causes intermittent page * faults if this is not done. */ if (nmp->nm_wsize > NFS_FABLKSIZE) nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1); else nmp->nm_wsize = NFS_FABLKSIZE; } if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { nmp->nm_rsize = argp->rsize; /* * Clip at the power of 2 below the size. There is an * issue (not isolated) that causes intermittent page * faults if this is not done. */ if (nmp->nm_rsize > NFS_FABLKSIZE) nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1); else nmp->nm_rsize = NFS_FABLKSIZE; } if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { nmp->nm_readdirsize = argp->readdirsize; } if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) nmp->nm_acregmin = argp->acregmin; else nmp->nm_acregmin = NFS_MINATTRTIMO; if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) nmp->nm_acregmax = argp->acregmax; else nmp->nm_acregmax = NFS_MAXATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) nmp->nm_acdirmin = argp->acdirmin; else nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) nmp->nm_acdirmax = argp->acdirmax; else nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; if (nmp->nm_acdirmin > nmp->nm_acdirmax) nmp->nm_acdirmin = nmp->nm_acdirmax; if (nmp->nm_acregmin > nmp->nm_acregmax) nmp->nm_acregmin = nmp->nm_acregmax; if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { if (argp->readahead <= NFS_MAXRAHEAD) nmp->nm_readahead = argp->readahead; else nmp->nm_readahead = NFS_MAXRAHEAD; } if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) { if (argp->wcommitsize < nmp->nm_wsize) nmp->nm_wcommitsize = nmp->nm_wsize; else nmp->nm_wcommitsize = argp->wcommitsize; } adjsock |= ((nmp->nm_sotype != argp->sotype) || (nmp->nm_soproto != argp->proto)); if (nmp->nm_client != NULL && adjsock) { int haslock = 0, error = 0; if (nmp->nm_sotype == SOCK_STREAM) { error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock); if (!error) haslock = 1; } if (!error) { newnfs_disconnect(&nmp->nm_sockreq); if (haslock) newnfs_sndunlock(&nmp->nm_sockreq.nr_lock); nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; if (nmp->nm_sotype == SOCK_DGRAM) while (newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false)) { printf("newnfs_args: retrying connect\n"); (void) nfs_catnap(PSOCK, 0, "nfscon"); } } } else { nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; } if (hostname != NULL) { strlcpy(nmp->nm_hostname, hostname, sizeof(nmp->nm_hostname)); p = strchr(nmp->nm_hostname, ':'); if (p != NULL) *p = '\0'; } } static const char *nfs_opts[] = { "from", "nfs_args", "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union", "noclusterr", "noclusterw", "multilabel", "acls", "force", "update", "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize", "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh", "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr", - "pnfs", "wcommitsize", "oneopenown", "tls", + "pnfs", "wcommitsize", "oneopenown", "tls", "tlscertname", NULL }; /* * Parse the "from" mountarg, passed by the generic mount(8) program * or the mountroot code. This is used when rerooting into NFS. * * Note that the "hostname" is actually a "hostname:/share/path" string. */ static int nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep, struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp) { char *nam, *delimp, *hostp, *spec; int error, have_bracket = 0, offset, rv, speclen; struct sockaddr_in *sin; size_t len; error = vfs_getopt(opts, "from", (void **)&spec, &speclen); if (error != 0) return (error); nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK); /* * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs(). */ if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL && *(delimp + 1) == ':') { hostp = spec + 1; spec = delimp + 2; have_bracket = 1; } else if ((delimp = strrchr(spec, ':')) != NULL) { hostp = spec; spec = delimp + 1; } else if ((delimp = strrchr(spec, '@')) != NULL) { printf("%s: path@server syntax is deprecated, " "use server:path\n", __func__); hostp = delimp + 1; } else { printf("%s: no : nfs-name\n", __func__); free(nam, M_TEMP); return (EINVAL); } *delimp = '\0'; /* * If there has been a trailing slash at mounttime it seems * that some mountd implementations fail to remove the mount * entries from their mountlist while unmounting. */ for (speclen = strlen(spec); speclen > 1 && spec[speclen - 1] == '/'; speclen--) spec[speclen - 1] = '\0'; if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) { printf("%s: %s:%s: name too long", __func__, hostp, spec); free(nam, M_TEMP); return (EINVAL); } /* Make both '@' and ':' notations equal */ if (*hostp != '\0') { len = strlen(hostp); offset = 0; if (have_bracket) nam[offset++] = '['; memmove(nam + offset, hostp, len); if (have_bracket) nam[len + offset++] = ']'; nam[len + offset++] = ':'; memmove(nam + len + offset, spec, speclen); nam[len + speclen + offset] = '\0'; } else nam[0] = '\0'; /* * XXX: IPv6 */ sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK); rv = inet_pton(AF_INET, hostp, &sin->sin_addr); if (rv != 1) { printf("%s: cannot parse '%s', inet_pton() returned %d\n", __func__, hostp, rv); free(nam, M_TEMP); free(sin, M_SONAME); return (EINVAL); } sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; /* * XXX: hardcoded port number. */ sin->sin_port = htons(2049); *hostnamep = strdup(nam, M_NEWNFSMNT); *sinp = sin; strlcpy(dirpath, spec, dirpathsize); *dirlenp = strlen(dirpath); free(nam, M_TEMP); return (0); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the getsockaddr() call because getsockaddr() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_mount(struct mount *mp) { struct nfs_args args = { .version = NFS_ARGSVERSION, .addr = NULL, .addrlen = sizeof (struct sockaddr_in), .sotype = SOCK_STREAM, .proto = 0, .fh = NULL, .fhsize = 0, .flags = NFSMNT_RESVPORT, .wsize = NFS_WSIZE, .rsize = NFS_RSIZE, .readdirsize = NFS_READDIRSIZE, .timeo = 10, .retrans = NFS_RETRANS, .readahead = NFS_DEFRAHEAD, .wcommitsize = 0, /* was: NQ_DEFLEASE */ .hostname = NULL, .acregmin = NFS_MINATTRTIMO, .acregmax = NFS_MAXATTRTIMO, .acdirmin = NFS_MINDIRATTRTIMO, .acdirmax = NFS_MAXDIRATTRTIMO, }; int error = 0, ret, len; struct sockaddr *nam = NULL; struct vnode *vp; struct thread *td; char *hst; u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100]; - char *cp, *opt, *name, *secname; + char *cp, *opt, *name, *secname, *tlscertname; int nametimeo = NFS_DEFAULT_NAMETIMEO; int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO; int minvers = 0; int dirlen, has_nfs_args_opt, has_nfs_from_opt, krbnamelen, srvkrbnamelen; size_t hstlen; uint32_t newflag; has_nfs_args_opt = 0; has_nfs_from_opt = 0; newflag = 0; + tlscertname = NULL; hst = malloc(MNAMELEN, M_TEMP, M_WAITOK); if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { error = EINVAL; goto out; } td = curthread; if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS && nfs_diskless_valid != 0) { error = nfs_mountroot(mp); goto out; } nfscl_init(); /* * The old mount_nfs program passed the struct nfs_args * from userspace to kernel. The new mount_nfs program * passes string options via nmount() from userspace to kernel * and we populate the struct nfs_args in the kernel. */ if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) { error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof(args)); if (error != 0) goto out; if (args.version != NFS_ARGSVERSION) { error = EPROGMISMATCH; goto out; } has_nfs_args_opt = 1; } /* Handle the new style options. */ if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) { args.acdirmin = args.acdirmax = args.acregmin = args.acregmax = 0; args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX | NFSMNT_ACREGMIN | NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0) args.flags |= NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0) args.flags &= ~NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0) args.flags |= NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0) args.flags &= ~NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0) args.flags |= NFSMNT_INT; if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0) args.flags |= NFSMNT_RDIRPLUS; if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0) args.flags |= NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0) args.flags &= ~NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0) args.flags |= NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0) args.flags &= ~NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0) args.sotype = SOCK_STREAM; if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0) args.flags |= NFSMNT_NFSV3; if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) { args.flags |= NFSMNT_NFSV4; args.sotype = SOCK_STREAM; } if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0) args.flags |= NFSMNT_ALLGSSNAME; if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0) args.flags |= NFSMNT_NOCTO; if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0) args.flags |= NFSMNT_NONCONTIGWR; if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0) args.flags |= NFSMNT_PNFS; if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0) args.flags |= NFSMNT_ONEOPENOWN; if (vfs_getopt(mp->mnt_optnew, "tls", NULL, NULL) == 0) newflag |= NFSMNT_TLS; + if (vfs_getopt(mp->mnt_optnew, "tlscertname", (void **)&opt, &len) == + 0) { + /* + * tlscertname with "key.pem" appended to it forms a file + * name. As such, the maximum allowable strlen(tlscertname) is + * NAME_MAX - 7. However, "len" includes the nul termination + * byte so it can be up to NAME_MAX - 6. + */ + if (opt == NULL || len <= 1 || len > NAME_MAX - 6) { + vfs_mount_error(mp, "invalid tlscertname"); + error = EINVAL; + goto out; + } + tlscertname = malloc(len, M_NEWNFSMNT, M_WAITOK); + strlcpy(tlscertname, opt, len); + } if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readdirsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readdirsize); if (ret != 1 || args.readdirsize <= 0) { vfs_mount_error(mp, "illegal readdirsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READDIRSIZE; } if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readahead"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readahead); if (ret != 1 || args.readahead <= 0) { vfs_mount_error(mp, "illegal readahead: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READAHEAD; } if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal wsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.wsize); if (ret != 1 || args.wsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WSIZE; } if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal rsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.rsize); if (ret != 1 || args.rsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RSIZE; } if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal retrans"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.retrans); if (ret != 1 || args.retrans <= 0) { vfs_mount_error(mp, "illegal retrans: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RETRANS; } if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmin); if (ret != 1 || args.acregmin < 0) { vfs_mount_error(mp, "illegal actimeo: %s", opt); error = EINVAL; goto out; } args.acdirmin = args.acdirmax = args.acregmax = args.acregmin; args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX | NFSMNT_ACREGMIN | NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmin); if (ret != 1 || args.acregmin < 0) { vfs_mount_error(mp, "illegal acregmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMIN; } if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmax); if (ret != 1 || args.acregmax < 0) { vfs_mount_error(mp, "illegal acregmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmin); if (ret != 1 || args.acdirmin < 0) { vfs_mount_error(mp, "illegal acdirmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMIN; } if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmax); if (ret != 1 || args.acdirmax < 0) { vfs_mount_error(mp, "illegal acdirmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMAX; } if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.wcommitsize); if (ret != 1 || args.wcommitsize < 0) { vfs_mount_error(mp, "illegal wcommitsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WCOMMITSIZE; } if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.timeo); if (ret != 1 || args.timeo <= 0) { vfs_mount_error(mp, "illegal timeo: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_TIMEO; } if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.timeo); if (ret != 1 || args.timeo <= 0) { vfs_mount_error(mp, "illegal timeout: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_TIMEO; } if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &nametimeo); if (ret != 1 || nametimeo < 0) { vfs_mount_error(mp, "illegal nametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &negnametimeo); if (ret != 1 || negnametimeo < 0) { vfs_mount_error(mp, "illegal negnametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &minvers); if (ret != 1 || minvers < 0 || minvers > 2 || (args.flags & NFSMNT_NFSV4) == 0) { vfs_mount_error(mp, "illegal minorversion: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "sec", (void **) &secname, NULL) == 0) nfs_sec_name(secname, &args.flags); if (mp->mnt_flag & MNT_UPDATE) { struct nfsmount *nmp = VFSTONFS(mp); if (nmp == NULL) { error = EIO; goto out; } /* * If a change from TCP->UDP is done and there are thread(s) * that have I/O RPC(s) in progress with a transfer size * greater than NFS_MAXDGRAMDATA, those thread(s) will be * hung, retrying the RPC(s) forever. Usually these threads * will be seen doing an uninterruptible sleep on wait channel * "nfsreq". */ if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM) tprintf(td->td_proc, LOG_WARNING, "Warning: mount -u that changes TCP->UDP can result in hung threads\n"); /* * When doing an update, we can't change version, * security, switch lockd strategies, change cookie * translation or switch oneopenown. */ args.flags = (args.flags & ~(NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY | NFSMNT_ONEOPENOWN | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) | (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY | NFSMNT_ONEOPENOWN | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td); goto out; } /* * Make the nfs_ip_paranoia sysctl serve as the default connection * or no-connection mode for those protocols that support * no-connection mode (the flag will be cleared later for protocols * that do not support no-connection mode). This will allow a client * to receive replies from a different IP then the request was * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), * not 0. */ if (nfs_ip_paranoia == 0) args.flags |= NFSMNT_NOCONN; if (has_nfs_args_opt != 0) { /* * In the 'nfs_args' case, the pointers in the args * structure are in userland - we copy them in here. */ if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); if (error != 0) goto out; error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen); if (error != 0) goto out; bzero(&hst[hstlen], MNAMELEN - hstlen); args.hostname = hst; /* getsockaddr() call must be after above copyin() calls */ error = getsockaddr(&nam, args.addr, args.addrlen); if (error != 0) goto out; } else if (nfs_mount_parse_from(mp->mnt_optnew, &args.hostname, (struct sockaddr_in **)&nam, dirpath, sizeof(dirpath), &dirlen) == 0) { has_nfs_from_opt = 1; bcopy(args.hostname, hst, MNAMELEN); hst[MNAMELEN - 1] = '\0'; /* * This only works with NFSv4 for now. */ args.fhsize = 0; args.flags |= NFSMNT_NFSV4; args.sotype = SOCK_STREAM; } else { if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh, &args.fhsize) == 0) { if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } bcopy(args.fh, nfh, args.fhsize); } else { args.fhsize = 0; } (void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname, &len); if (args.hostname == NULL) { vfs_mount_error(mp, "Invalid hostname"); error = EINVAL; goto out; } if (len >= MNAMELEN) { vfs_mount_error(mp, "Hostname too long"); error = EINVAL; goto out; } bcopy(args.hostname, hst, len); hst[len] = '\0'; } if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0) strlcpy(srvkrbname, name, sizeof (srvkrbname)); else { snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst); cp = strchr(srvkrbname, ':'); if (cp != NULL) *cp = '\0'; } srvkrbnamelen = strlen(srvkrbname); if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0) strlcpy(krbname, name, sizeof (krbname)); else krbname[0] = '\0'; krbnamelen = strlen(krbname); if (has_nfs_from_opt == 0) { if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0) strlcpy(dirpath, name, sizeof (dirpath)); else dirpath[0] = '\0'; dirlen = strlen(dirpath); } if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) { if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr, &args.addrlen) == 0) { if (args.addrlen > SOCK_MAXADDRLEN) { error = ENAMETOOLONG; goto out; } nam = malloc(args.addrlen, M_SONAME, M_WAITOK); bcopy(args.addr, nam, args.addrlen); nam->sa_len = args.addrlen; } else { vfs_mount_error(mp, "No server address"); error = EINVAL; goto out; } } args.fh = nfh; error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath, dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td, - nametimeo, negnametimeo, minvers, newflag); + nametimeo, negnametimeo, minvers, newflag, tlscertname); out: if (!error) { MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF | MNTK_USES_BCACHE; if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0) mp->mnt_kern_flag |= MNTK_NULL_NOCACHE; MNT_IUNLOCK(mp); } free(hst, M_TEMP); return (error); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the getsockaddr() call because getsockaddr() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_cmount(struct mntarg *ma, void *data, uint64_t flags) { int error; struct nfs_args args; error = copyin(data, &args, sizeof (struct nfs_args)); if (error) return error; ma = mount_arg(ma, "nfs_args", &args, sizeof args); error = kernel_mount(ma, flags); return (error); } /* * Common code for mount and mountroot */ static int mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen, u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp, struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo, - int minvers, uint32_t newflag) + int minvers, uint32_t newflag, char *tlscertname) { struct nfsmount *nmp; struct nfsnode *np; int error, trycnt, ret; struct nfsvattr nfsva; struct nfsclclient *clp; struct nfsclds *dsp, *tdsp; uint32_t lease; static u_int64_t clval = 0; #ifdef KERN_TLS u_int maxlen; #endif NFSCL_DEBUG(3, "in mnt\n"); clp = NULL; if (mp->mnt_flag & MNT_UPDATE) { nmp = VFSTONFS(mp); printf("%s: MNT_UPDATE is no longer handled here\n", __func__); free(nam, M_SONAME); + free(tlscertname, M_NEWNFSMNT); return (0); } else { /* NFS-over-TLS requires that rpctls be functioning. */ if ((newflag & NFSMNT_TLS) != 0) { error = EINVAL; #ifdef KERN_TLS /* KERN_TLS is only supported for TCP. */ if (argp->sotype == SOCK_STREAM && rpctls_getinfo(&maxlen, true, false)) error = 0; #endif if (error != 0) { free(nam, M_SONAME); + free(tlscertname, M_NEWNFSMNT); return (error); } } nmp = malloc(sizeof (struct nfsmount) + krbnamelen + dirlen + srvkrbnamelen + 2, M_NEWNFSMNT, M_WAITOK | M_ZERO); + nmp->nm_tlscertname = tlscertname; nmp->nm_newflag = newflag; TAILQ_INIT(&nmp->nm_bufq); TAILQ_INIT(&nmp->nm_sess); if (clval == 0) clval = (u_int64_t)nfsboottime.tv_sec; nmp->nm_clval = clval++; nmp->nm_krbnamelen = krbnamelen; nmp->nm_dirpathlen = dirlen; nmp->nm_srvkrbnamelen = srvkrbnamelen; if (td->td_ucred->cr_uid != (uid_t)0) { /* * nm_uid is used to get KerberosV credentials for * the nfsv4 state handling operations if there is * no host based principal set. Use the uid of * this user if not root, since they are doing the * mount. I don't think setting this for root will * work, since root normally does not have user * credentials in a credentials cache. */ nmp->nm_uid = td->td_ucred->cr_uid; } else { /* * Just set to -1, so it won't be used. */ nmp->nm_uid = (uid_t)-1; } /* Copy and null terminate all the names */ if (nmp->nm_krbnamelen > 0) { bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen); nmp->nm_name[nmp->nm_krbnamelen] = '\0'; } if (nmp->nm_dirpathlen > 0) { bcopy(dirpath, NFSMNT_DIRPATH(nmp), nmp->nm_dirpathlen); nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen + 1] = '\0'; } if (nmp->nm_srvkrbnamelen > 0) { bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp), nmp->nm_srvkrbnamelen); nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen + nmp->nm_srvkrbnamelen + 2] = '\0'; } nmp->nm_sockreq.nr_cred = crhold(cred); mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF); mp->mnt_data = nmp; nmp->nm_getinfo = nfs_getnlminfo; nmp->nm_vinvalbuf = ncl_vinvalbuf; } vfs_getnewfsid(mp); nmp->nm_mountp = mp; mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK); /* * Since nfs_decode_args() might optionally set them, these * need to be set to defaults before the call, so that the * optional settings aren't overwritten. */ nmp->nm_nametimeo = nametimeo; nmp->nm_negnametimeo = negnametimeo; nmp->nm_timeo = NFS_TIMEO; nmp->nm_retry = NFS_RETRANS; nmp->nm_readahead = NFS_DEFRAHEAD; /* This is empirical approximation of sqrt(hibufspace) * 256. */ nmp->nm_wcommitsize = NFS_MAXBSIZE / 256; while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace) nmp->nm_wcommitsize *= 2; nmp->nm_wcommitsize *= 256; if ((argp->flags & NFSMNT_NFSV4) != 0) nmp->nm_minorvers = minvers; else nmp->nm_minorvers = 0; nfs_decode_args(mp, nmp, argp, hst, cred, td); /* * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too * high, depending on whether we end up with negative offsets in * the client or server somewhere. 2GB-1 may be safer. * * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum * that we can handle until we find out otherwise. */ if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) nmp->nm_maxfilesize = 0xffffffffLL; else nmp->nm_maxfilesize = OFF_MAX; if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { nmp->nm_wsize = NFS_WSIZE; nmp->nm_rsize = NFS_RSIZE; nmp->nm_readdirsize = NFS_READDIRSIZE; } nmp->nm_numgrps = NFS_MAXGRPS; nmp->nm_tprintf_delay = nfs_tprintf_delay; if (nmp->nm_tprintf_delay < 0) nmp->nm_tprintf_delay = 0; nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay; if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; nmp->nm_fhsize = argp->fhsize; if (nmp->nm_fhsize > 0) bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); nmp->nm_nam = nam; /* Set up the sockets and per-host congestion */ nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; nmp->nm_sockreq.nr_prog = NFS_PROG; if ((argp->flags & NFSMNT_NFSV4)) nmp->nm_sockreq.nr_vers = NFS_VER4; else if ((argp->flags & NFSMNT_NFSV3)) nmp->nm_sockreq.nr_vers = NFS_VER3; else nmp->nm_sockreq.nr_vers = NFS_VER2; if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false))) goto bad; /* For NFSv4.1, get the clientid now. */ if (nmp->nm_minorvers > 0) { NFSCL_DEBUG(3, "at getcl\n"); error = nfscl_getcl(mp, cred, td, 0, &clp); NFSCL_DEBUG(3, "aft getcl=%d\n", error); if (error != 0) goto bad; } if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) && nmp->nm_dirpathlen > 0) { NFSCL_DEBUG(3, "in dirp\n"); /* * If the fhsize on the mount point == 0 for V4, the mount * path needs to be looked up. */ trycnt = 3; do { error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, td); NFSCL_DEBUG(3, "aft dirp=%d\n", error); if (error) (void) nfs_catnap(PZERO, error, "nfsgetdirp"); } while (error && --trycnt > 0); if (error) goto bad; } /* * A reference count is needed on the nfsnode representing the * remote root. If this object is not persistent, then backward * traversals of the mount point (i.e. "..") will not work if * the nfsnode gets flushed out of the cache. Ufs does not have * this problem, because one can identify root inodes by their * number == UFS_ROOTINO (2). */ if (nmp->nm_fhsize > 0) { /* * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set * non-zero for the root vnode. f_iosize will be set correctly * by nfs_statfs() before any I/O occurs. */ mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ; error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) goto bad; *vpp = NFSTOV(np); /* * Get file attributes and transfer parameters for the * mountpoint. This has the side effect of filling in * (*vpp)->v_type with the correct value. */ ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, cred, td, &nfsva, NULL, &lease); if (ret) { /* * Just set default values to get things going. */ NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); nfsva.na_vattr.va_type = VDIR; nfsva.na_vattr.va_mode = 0777; nfsva.na_vattr.va_nlink = 100; nfsva.na_vattr.va_uid = (uid_t)0; nfsva.na_vattr.va_gid = (gid_t)0; nfsva.na_vattr.va_fileid = 2; nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; lease = 60; } (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1); if (nmp->nm_minorvers > 0) { NFSCL_DEBUG(3, "lease=%d\n", (int)lease); NFSLOCKCLSTATE(); clp->nfsc_renew = NFSCL_RENEW(lease); clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clp->nfsc_clientidrev++; if (clp->nfsc_clientidrev == 0) clp->nfsc_clientidrev++; NFSUNLOCKCLSTATE(); /* * Mount will succeed, so the renew thread can be * started now. */ nfscl_start_renewthread(clp); nfscl_clientrelease(clp); } if (argp->flags & NFSMNT_NFSV3) ncl_fsinfo(nmp, *vpp, cred, td); /* Mark if the mount point supports NFSv4 ACLs. */ if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 && ret == 0 && NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); } /* * Lose the lock but keep the ref. */ NFSVOPUNLOCK(*vpp); vfs_cache_root_set(mp, *vpp); return (0); } error = EIO; bad: if (clp != NULL) nfscl_clientrelease(clp); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); if (nmp->nm_sockreq.nr_auth != NULL) AUTH_DESTROY(nmp->nm_sockreq.nr_auth); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); if (nmp->nm_clp != NULL) { NFSLOCKCLSTATE(); LIST_REMOVE(nmp->nm_clp, nfsc_list); NFSUNLOCKCLSTATE(); free(nmp->nm_clp, M_NFSCLCLIENT); } TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) { if (dsp != TAILQ_FIRST(&nmp->nm_sess) && dsp->nfsclds_sockp != NULL) newnfs_disconnect(dsp->nfsclds_sockp); nfscl_freenfsclds(dsp); } + free(nmp->nm_tlscertname, M_NEWNFSMNT); free(nmp, M_NEWNFSMNT); free(nam, M_SONAME); return (error); } /* * unmount system call */ static int nfs_unmount(struct mount *mp, int mntflags) { struct thread *td; struct nfsmount *nmp; int error, flags = 0, i, trycnt = 0; struct nfsclds *dsp, *tdsp; td = curthread; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; nmp = VFSTONFS(mp); error = 0; /* * Goes something like this.. * - Call vflush() to clear out vnodes for this filesystem * - Close the socket * - Free up the data structures */ /* In the forced case, cancel any outstanding requests. */ if (mntflags & MNT_FORCE) { NFSDDSLOCK(); if (nfsv4_findmirror(nmp) != NULL) error = ENXIO; NFSDDSUNLOCK(); if (error) goto out; error = newnfs_nmcancelreqs(nmp); if (error) goto out; /* For a forced close, get rid of the renew thread now */ nfscl_umount(nmp, td); } /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ do { error = vflush(mp, 1, flags, td); if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30) (void) nfs_catnap(PSOCK, error, "newndm"); } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30); if (error) goto out; /* * We are now committed to the unmount. */ if ((mntflags & MNT_FORCE) == 0) nfscl_umount(nmp, td); else { mtx_lock(&nmp->nm_mtx); nmp->nm_privflag |= NFSMNTP_FORCEDISM; mtx_unlock(&nmp->nm_mtx); } /* Make sure no nfsiods are assigned to this mount. */ NFSLOCKIOD(); for (i = 0; i < NFS_MAXASYNCDAEMON; i++) if (ncl_iodmount[i] == nmp) { ncl_iodwant[i] = NFSIOD_AVAILABLE; ncl_iodmount[i] = NULL; } NFSUNLOCKIOD(); /* * We can now set mnt_data to NULL and wait for * nfssvc(NFSSVC_FORCEDISM) to complete. */ mtx_lock(&mountlist_mtx); mtx_lock(&nmp->nm_mtx); mp->mnt_data = NULL; mtx_unlock(&mountlist_mtx); while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0) msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0); mtx_unlock(&nmp->nm_mtx); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); free(nmp->nm_nam, M_SONAME); if (nmp->nm_sockreq.nr_auth != NULL) AUTH_DESTROY(nmp->nm_sockreq.nr_auth); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) { if (dsp != TAILQ_FIRST(&nmp->nm_sess) && dsp->nfsclds_sockp != NULL) newnfs_disconnect(dsp->nfsclds_sockp); nfscl_freenfsclds(dsp); } + free(nmp->nm_tlscertname, M_NEWNFSMNT); free(nmp, M_NEWNFSMNT); out: return (error); } /* * Return root of a filesystem */ static int nfs_root(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *vp; struct nfsmount *nmp; struct nfsnode *np; int error; nmp = VFSTONFS(mp); error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags); if (error) return error; vp = NFSTOV(np); /* * Get transfer parameters and attributes for root vnode once. */ mtx_lock(&nmp->nm_mtx); if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { mtx_unlock(&nmp->nm_mtx); ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread); } else mtx_unlock(&nmp->nm_mtx); if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } /* * Flush out the buffer cache */ /* ARGSUSED */ static int nfs_sync(struct mount *mp, int waitfor) { struct vnode *vp, *mvp; struct thread *td; int error, allerror = 0; td = curthread; MNT_ILOCK(mp); /* * If a forced dismount is in progress, return from here so that * the umount(2) syscall doesn't get stuck in VFS_SYNC() before * calling VFS_UNMOUNT(). */ if (NFSCL_FORCEDISM(mp)) { MNT_IUNLOCK(mp); return (EBADF); } MNT_IUNLOCK(mp); /* * Force stale buffer cache information to be flushed. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* XXX Racy bv_cnt check. */ if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY) { VI_UNLOCK(vp); continue; } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } error = VOP_FSYNC(vp, waitfor, td); if (error) allerror = error; NFSVOPUNLOCK(vp); vrele(vp); } return (allerror); } static int nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req) { struct nfsmount *nmp = VFSTONFS(mp); struct vfsquery vq; int error; bzero(&vq, sizeof(vq)); switch (op) { #if 0 case VFS_CTL_NOLOCKS: val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0; if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &val, sizeof(val)); if (error) return (error); } if (req->newptr != NULL) { error = SYSCTL_IN(req, &val, sizeof(val)); if (error) return (error); if (val) nmp->nm_flag |= NFSMNT_NOLOCKS; else nmp->nm_flag &= ~NFSMNT_NOLOCKS; } break; #endif case VFS_CTL_QUERY: mtx_lock(&nmp->nm_mtx); if (nmp->nm_state & NFSSTA_TIMEO) vq.vq_flags |= VQ_NOTRESP; mtx_unlock(&nmp->nm_mtx); #if 0 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) vq.vq_flags |= VQ_NOTRESPLOCK; #endif error = SYSCTL_OUT(req, &vq, sizeof(vq)); break; case VFS_CTL_TIMEO: if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); } if (req->newptr != NULL) { error = vfs_suser(mp, req->td); if (error) return (error); error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; } break; default: return (ENOTSUP); } return (0); } /* * Purge any RPCs in progress, so that they will all return errors. * This allows dounmount() to continue as far as VFS_UNMOUNT() for a * forced dismount. */ static void nfs_purge(struct mount *mp) { struct nfsmount *nmp = VFSTONFS(mp); newnfs_nmcancelreqs(nmp); } /* * Extract the information needed by the nlm from the nfs vnode. */ static void nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp, struct sockaddr_storage *sp, int *is_v3p, off_t *sizep, struct timeval *timeop) { struct nfsmount *nmp; struct nfsnode *np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); if (fhlenp != NULL) *fhlenp = (size_t)np->n_fhp->nfh_len; if (fhp != NULL) bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len); if (sp != NULL) bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp))); if (is_v3p != NULL) *is_v3p = NFS_ISV3(vp); if (sizep != NULL) *sizep = np->n_size; if (timeop != NULL) { timeop->tv_sec = nmp->nm_timeo / NFS_HZ; timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ); } } /* * This function prints out an option name, based on the conditional * argument. */ static __inline void nfscl_printopt(struct nfsmount *nmp, int testval, char *opt, char **buf, size_t *blen) { int len; if (testval != 0 && *blen > strlen(opt)) { len = snprintf(*buf, *blen, "%s", opt); if (len != strlen(opt)) printf("EEK!!\n"); *buf += len; *blen -= len; } } /* * This function printf out an options integer value. */ static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval, char *opt, char **buf, size_t *blen) { int len; if (*blen > strlen(opt) + 1) { /* Could result in truncated output string. */ len = snprintf(*buf, *blen, "%s=%d", opt, optval); if (len < *blen) { *buf += len; *blen -= len; } } } /* * Load the option flags and values into the buffer. */ void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen) { char *buf; size_t blen; buf = buffer; blen = buflen; nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf, &blen); if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) { nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 && nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen); } nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0, "nfsv2", &buf, &blen); nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen); nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_TLS) != 0, ",tls", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0, ",noncontigwr", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) == 0, ",lockd", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) == NFSMNT_NOLOCKD, ",nolockd", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen); } diff --git a/sys/fs/nfsclient/nfsmount.h b/sys/fs/nfsclient/nfsmount.h index 063926eceaf5..57adcd8f2fca 100644 --- a/sys/fs/nfsclient/nfsmount.h +++ b/sys/fs/nfsclient/nfsmount.h @@ -1,141 +1,142 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFSCLIENT_NFSMOUNT_H_ #define _NFSCLIENT_NFSMOUNT_H_ #include /* * Mount structure. * One allocated on every NFS mount. * Holds NFS specific information for mount. */ struct nfsmount { struct nfsmount_common nm_com; /* Common fields for nlm */ uint32_t nm_privflag; /* Private flags */ uint32_t nm_newflag; /* New mount flags */ int nm_numgrps; /* Max. size of groupslist */ u_char nm_fh[NFSX_FHMAX]; /* File handle of root dir */ int nm_fhsize; /* Size of root file handle */ struct nfssockreq nm_sockreq; /* Socket Info */ int nm_timeouts; /* Request timeouts */ int nm_rsize; /* Max size of read rpc */ int nm_wsize; /* Max size of write rpc */ int nm_readdirsize; /* Size of a readdir rpc */ int nm_readahead; /* Num. of blocks to readahead */ int nm_wcommitsize; /* Max size of commit for write */ int nm_acdirmin; /* Directory attr cache min lifetime */ int nm_acdirmax; /* Directory attr cache max lifetime */ int nm_acregmin; /* Reg file attr cache min lifetime */ int nm_acregmax; /* Reg file attr cache max lifetime */ u_char nm_verf[NFSX_VERF]; /* write verifier */ TAILQ_HEAD(, buf) nm_bufq; /* async io buffer queue */ short nm_bufqlen; /* number of buffers in queue */ short nm_bufqwant; /* process wants to add to the queue */ int nm_bufqiods; /* number of iods processing queue */ u_int64_t nm_maxfilesize; /* maximum file size */ int nm_tprintf_initial_delay; /* initial delay */ int nm_tprintf_delay; /* interval for messages */ int nm_nametimeo; /* timeout for +ve entries (sec) */ int nm_negnametimeo; /* timeout for -ve entries (sec) */ /* Newnfs additions */ TAILQ_HEAD(, nfsclds) nm_sess; /* Session(s) for NFSv4.1. */ struct nfsclclient *nm_clp; + char *nm_tlscertname; /* TLS certificate file name */ uid_t nm_uid; /* Uid for SetClientID etc. */ u_int64_t nm_clval; /* identifies which clientid */ u_int64_t nm_fsid[2]; /* NFSv4 fsid */ int nm_minorvers; /* Minor version # for NFSv4 */ u_int16_t nm_krbnamelen; /* Krb5 host principal, if any */ u_int16_t nm_dirpathlen; /* and mount dirpath, for V4 */ u_int16_t nm_srvkrbnamelen; /* and the server's target name */ u_char nm_name[1]; /* malloc'd actual len of krbname + dirpath */ }; #define nm_nam nm_sockreq.nr_nam #define nm_sotype nm_sockreq.nr_sotype #define nm_so nm_sockreq.nr_so #define nm_soflags nm_sockreq.nr_soflags #define nm_soproto nm_sockreq.nr_soproto #define nm_client nm_sockreq.nr_client #define nm_krbname nm_name #define nm_mtx nm_com.nmcom_mtx #define nm_flag nm_com.nmcom_flag #define nm_state nm_com.nmcom_state #define nm_mountp nm_com.nmcom_mountp #define nm_timeo nm_com.nmcom_timeo #define nm_retry nm_com.nmcom_retry #define nm_hostname nm_com.nmcom_hostname #define nm_getinfo nm_com.nmcom_getinfo #define nm_vinvalbuf nm_com.nmcom_vinvalbuf /* Private flags. */ #define NFSMNTP_FORCEDISM 0x00000001 #define NFSMNTP_CANCELRPCS 0x00000002 #define NFSMNTP_IOADVISETHRUMDS 0x00000004 #define NFSMNTP_NOCOPY 0x00000008 #define NFSMNTP_NOCONSECUTIVE 0x00000010 #define NFSMNTP_SEEK 0x00000020 #define NFSMNTP_SEEKTESTED 0x00000040 #define NFSMNTP_NOXATTR 0x00000080 #define NFSMNTP_NOADVISE 0x00000100 #define NFSMNTP_NOALLOCATE 0x00000200 /* New mount flags only used by the kernel via nmount(2). */ #define NFSMNT_TLS 0x00000001 #define NFSMNT_DIRPATH(m) (&((m)->nm_name[(m)->nm_krbnamelen + 1])) #define NFSMNT_SRVKRBNAME(m) \ (&((m)->nm_name[(m)->nm_krbnamelen + (m)->nm_dirpathlen + 2])) #if defined(_KERNEL) /* * Convert mount ptr to nfsmount ptr. */ #define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data)) #ifndef NFS_DEFAULT_NAMETIMEO #define NFS_DEFAULT_NAMETIMEO 60 #endif #ifndef NFS_DEFAULT_NEGNAMETIMEO #define NFS_DEFAULT_NEGNAMETIMEO 60 #endif #endif /* _KERNEL */ #endif /* _NFSCLIENT_NFSMOUNT_H_ */ diff --git a/sys/rpc/clnt.h b/sys/rpc/clnt.h index 23c92103edff..f4cc78b1c3b6 100644 --- a/sys/rpc/clnt.h +++ b/sys/rpc/clnt.h @@ -1,718 +1,719 @@ /* $NetBSD: clnt.h,v 1.14 2000/06/02 22:57:55 fvdl Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2010, Oracle America, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the "Oracle America, Inc." nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * from: @(#)clnt.h 1.31 94/04/29 SMI * from: @(#)clnt.h 2.1 88/07/29 4.0 RPCSRC * $FreeBSD$ */ /* * clnt.h - Client side remote procedure call interface. * * Copyright (c) 1986-1991,1994-1999 by Sun Microsystems, Inc. * All rights reserved. */ #ifndef _RPC_CLNT_H_ #define _RPC_CLNT_H_ #include #include #ifdef _KERNEL #include #include #else #include #endif #include /* * Well-known IPV6 RPC broadcast address. */ #define RPCB_MULTICAST_ADDR "ff02::202" /* * the following errors are in general unrecoverable. The caller * should give up rather than retry. */ #define IS_UNRECOVERABLE_RPC(s) (((s) == RPC_AUTHERROR) || \ ((s) == RPC_CANTENCODEARGS) || \ ((s) == RPC_CANTDECODERES) || \ ((s) == RPC_VERSMISMATCH) || \ ((s) == RPC_PROCUNAVAIL) || \ ((s) == RPC_PROGUNAVAIL) || \ ((s) == RPC_PROGVERSMISMATCH) || \ ((s) == RPC_CANTDECODEARGS)) /* * Error info. */ struct rpc_err { enum clnt_stat re_status; union { int RE_errno; /* related system error */ enum auth_stat RE_why; /* why the auth error occurred */ struct { rpcvers_t low; /* lowest version supported */ rpcvers_t high; /* highest version supported */ } RE_vers; struct { /* maybe meaningful if RPC_FAILED */ int32_t s1; int32_t s2; } RE_lb; /* life boot & debugging only */ } ru; #define re_errno ru.RE_errno #define re_why ru.RE_why #define re_vers ru.RE_vers #define re_lb ru.RE_lb }; #ifdef _KERNEL /* * Functions of this type may be used to receive notification when RPC * calls have to be re-transmitted etc. */ typedef void rpc_feedback(int cmd, int procnum, void *); /* * Timers used for the pseudo-transport protocol when using datagrams */ struct rpc_timers { u_short rt_srtt; /* smoothed round-trip time */ u_short rt_deviate; /* estimated deviation */ u_long rt_rtxcur; /* current (backed-off) rto */ }; /* * A structure used with CLNT_CALL_EXT to pass extra information used * while processing an RPC call. */ struct rpc_callextra { AUTH *rc_auth; /* auth handle to use for this call */ rpc_feedback *rc_feedback; /* callback for retransmits etc. */ void *rc_feedback_arg; /* argument for callback */ struct rpc_timers *rc_timers; /* optional RTT timers */ struct rpc_err rc_err; /* detailed call status */ }; #endif /* * Client rpc handle. * Created by individual implementations * Client is responsible for initializing auth, see e.g. auth_none.c. */ typedef struct __rpc_client { #ifdef _KERNEL volatile u_int cl_refs; /* reference count */ AUTH *cl_auth; /* authenticator */ struct clnt_ops { /* call remote procedure */ enum clnt_stat (*cl_call)(struct __rpc_client *, struct rpc_callextra *, rpcproc_t, struct mbuf *, struct mbuf **, struct timeval); /* abort a call */ void (*cl_abort)(struct __rpc_client *); /* get specific error code */ void (*cl_geterr)(struct __rpc_client *, struct rpc_err *); /* frees results */ bool_t (*cl_freeres)(struct __rpc_client *, xdrproc_t, void *); /* close the connection and terminate pending RPCs */ void (*cl_close)(struct __rpc_client *); /* destroy this structure */ void (*cl_destroy)(struct __rpc_client *); /* the ioctl() of rpc */ bool_t (*cl_control)(struct __rpc_client *, u_int, void *); } *cl_ops; #else AUTH *cl_auth; /* authenticator */ struct clnt_ops { /* call remote procedure */ enum clnt_stat (*cl_call)(struct __rpc_client *, rpcproc_t, xdrproc_t, void *, xdrproc_t, void *, struct timeval); /* abort a call */ void (*cl_abort)(struct __rpc_client *); /* get specific error code */ void (*cl_geterr)(struct __rpc_client *, struct rpc_err *); /* frees results */ bool_t (*cl_freeres)(struct __rpc_client *, xdrproc_t, void *); /* destroy this structure */ void (*cl_destroy)(struct __rpc_client *); /* the ioctl() of rpc */ bool_t (*cl_control)(struct __rpc_client *, u_int, void *); } *cl_ops; #endif void *cl_private; /* private stuff */ char *cl_netid; /* network token */ char *cl_tp; /* device name */ } CLIENT; /* * Feedback values used for possible congestion and rate control */ #define FEEDBACK_OK 1 /* no retransmits */ #define FEEDBACK_REXMIT1 2 /* first retransmit */ #define FEEDBACK_REXMIT2 3 /* second and subsequent retransmit */ #define FEEDBACK_RECONNECT 4 /* client reconnect */ /* Used to set version of portmapper used in broadcast */ #define CLCR_SET_LOWVERS 3 #define CLCR_GET_LOWVERS 4 #define RPCSMALLMSGSIZE 400 /* a more reasonable packet size */ /* * client side rpc interface ops * * Parameter types are: * */ #ifdef _KERNEL #define CLNT_ACQUIRE(rh) \ refcount_acquire(&(rh)->cl_refs) #define CLNT_RELEASE(rh) \ if (refcount_release(&(rh)->cl_refs)) \ CLNT_DESTROY(rh) /* * void * CLNT_CLOSE(rh); * CLIENT *rh; */ #define CLNT_CLOSE(rh) ((*(rh)->cl_ops->cl_close)(rh)) enum clnt_stat clnt_call_private(CLIENT *, struct rpc_callextra *, rpcproc_t, xdrproc_t, void *, xdrproc_t, void *, struct timeval); /* * enum clnt_stat * CLNT_CALL_MBUF(rh, ext, proc, mreq, mrepp, timeout) * CLIENT *rh; * struct rpc_callextra *ext; * rpcproc_t proc; * struct mbuf *mreq; * struct mbuf **mrepp; * struct timeval timeout; * * Call arguments in mreq which is consumed by the call (even if there * is an error). Results returned in *mrepp. */ #define CLNT_CALL_MBUF(rh, ext, proc, mreq, mrepp, secs) \ ((*(rh)->cl_ops->cl_call)(rh, ext, proc, mreq, mrepp, secs)) /* * enum clnt_stat * CLNT_CALL_EXT(rh, ext, proc, xargs, argsp, xres, resp, timeout) * CLIENT *rh; * struct rpc_callextra *ext; * rpcproc_t proc; * xdrproc_t xargs; * void *argsp; * xdrproc_t xres; * void *resp; * struct timeval timeout; */ #define CLNT_CALL_EXT(rh, ext, proc, xargs, argsp, xres, resp, secs) \ clnt_call_private(rh, ext, proc, xargs, \ argsp, xres, resp, secs) #endif /* * enum clnt_stat * CLNT_CALL(rh, proc, xargs, argsp, xres, resp, timeout) * CLIENT *rh; * rpcproc_t proc; * xdrproc_t xargs; * void *argsp; * xdrproc_t xres; * void *resp; * struct timeval timeout; */ #ifdef _KERNEL #define CLNT_CALL(rh, proc, xargs, argsp, xres, resp, secs) \ clnt_call_private(rh, NULL, proc, xargs, \ argsp, xres, resp, secs) #define clnt_call(rh, proc, xargs, argsp, xres, resp, secs) \ clnt_call_private(rh, NULL, proc, xargs, \ argsp, xres, resp, secs) #else #define CLNT_CALL(rh, proc, xargs, argsp, xres, resp, secs) \ ((*(rh)->cl_ops->cl_call)(rh, proc, xargs, \ argsp, xres, resp, secs)) #define clnt_call(rh, proc, xargs, argsp, xres, resp, secs) \ ((*(rh)->cl_ops->cl_call)(rh, proc, xargs, \ argsp, xres, resp, secs)) #endif /* * void * CLNT_ABORT(rh); * CLIENT *rh; */ #define CLNT_ABORT(rh) ((*(rh)->cl_ops->cl_abort)(rh)) #define clnt_abort(rh) ((*(rh)->cl_ops->cl_abort)(rh)) /* * struct rpc_err * CLNT_GETERR(rh); * CLIENT *rh; */ #define CLNT_GETERR(rh,errp) ((*(rh)->cl_ops->cl_geterr)(rh, errp)) #define clnt_geterr(rh,errp) ((*(rh)->cl_ops->cl_geterr)(rh, errp)) /* * bool_t * CLNT_FREERES(rh, xres, resp); * CLIENT *rh; * xdrproc_t xres; * void *resp; */ #define CLNT_FREERES(rh,xres,resp) ((*(rh)->cl_ops->cl_freeres)(rh,xres,resp)) #define clnt_freeres(rh,xres,resp) ((*(rh)->cl_ops->cl_freeres)(rh,xres,resp)) /* * bool_t * CLNT_CONTROL(cl, request, info) * CLIENT *cl; * u_int request; * char *info; */ #define CLNT_CONTROL(cl,rq,in) ((*(cl)->cl_ops->cl_control)(cl,rq,in)) #define clnt_control(cl,rq,in) ((*(cl)->cl_ops->cl_control)(cl,rq,in)) /* * control operations that apply to both udp and tcp transports */ #define CLSET_TIMEOUT 1 /* set timeout (timeval) */ #define CLGET_TIMEOUT 2 /* get timeout (timeval) */ #define CLGET_SERVER_ADDR 3 /* get server's address (sockaddr) */ #define CLGET_FD 6 /* get connections file descriptor */ #define CLGET_SVC_ADDR 7 /* get server's address (netbuf) */ #define CLSET_FD_CLOSE 8 /* close fd while clnt_destroy */ #define CLSET_FD_NCLOSE 9 /* Do not close fd while clnt_destroy */ #define CLGET_XID 10 /* Get xid */ #define CLSET_XID 11 /* Set xid */ #define CLGET_VERS 12 /* Get version number */ #define CLSET_VERS 13 /* Set version number */ #define CLGET_PROG 14 /* Get program number */ #define CLSET_PROG 15 /* Set program number */ #define CLSET_SVC_ADDR 16 /* get server's address (netbuf) */ #define CLSET_PUSH_TIMOD 17 /* push timod if not already present */ #define CLSET_POP_TIMOD 18 /* pop timod */ /* * Connectionless only control operations */ #define CLSET_RETRY_TIMEOUT 4 /* set retry timeout (timeval) */ #define CLGET_RETRY_TIMEOUT 5 /* get retry timeout (timeval) */ #define CLSET_ASYNC 19 #define CLSET_CONNECT 20 /* Use connect() for UDP. (int) */ #ifdef _KERNEL /* * Kernel control operations. The default msleep string is "rpcrecv", * and sleeps are non-interruptible by default. */ #define CLSET_WAITCHAN 21 /* set string to use in msleep call */ #define CLGET_WAITCHAN 22 /* get string used in msleep call */ #define CLSET_INTERRUPTIBLE 23 /* set interruptible flag */ #define CLGET_INTERRUPTIBLE 24 /* set interruptible flag */ #define CLSET_RETRIES 25 /* set retry count for reconnect */ #define CLGET_RETRIES 26 /* get retry count for reconnect */ #define CLSET_PRIVPORT 27 /* set privileged source port flag */ #define CLGET_PRIVPORT 28 /* get privileged source port flag */ #define CLSET_BACKCHANNEL 29 /* set backchannel for socket */ #define CLSET_TLS 30 /* set TLS for socket */ #define CLSET_BLOCKRCV 31 /* Temporarily block reception */ +#define CLSET_TLSCERTNAME 32 /* TLS certificate file name */ #endif /* * void * CLNT_DESTROY(rh); * CLIENT *rh; */ #define CLNT_DESTROY(rh) ((*(rh)->cl_ops->cl_destroy)(rh)) #define clnt_destroy(rh) ((*(rh)->cl_ops->cl_destroy)(rh)) /* * RPCTEST is a test program which is accessible on every rpc * transport/port. It is used for testing, performance evaluation, * and network administration. */ #define RPCTEST_PROGRAM ((rpcprog_t)1) #define RPCTEST_VERSION ((rpcvers_t)1) #define RPCTEST_NULL_PROC ((rpcproc_t)2) #define RPCTEST_NULL_BATCH_PROC ((rpcproc_t)3) /* * By convention, procedure 0 takes null arguments and returns them */ #define NULLPROC ((rpcproc_t)0) /* * Below are the client handle creation routines for the various * implementations of client side rpc. They can return NULL if a * creation failure occurs. */ /* * Generic client creation routine. Supported protocols are those that * belong to the nettype namespace (/etc/netconfig). */ __BEGIN_DECLS #ifdef _KERNEL /* * struct socket *so; -- socket * struct sockaddr *svcaddr; -- servers address * rpcprog_t prog; -- program number * rpcvers_t vers; -- version number * size_t sendsz; -- buffer recv size * size_t recvsz; -- buffer send size */ extern CLIENT *clnt_dg_create(struct socket *so, struct sockaddr *svcaddr, rpcprog_t program, rpcvers_t version, size_t sendsz, size_t recvsz); /* * struct socket *so; -- socket * struct sockaddr *svcaddr; -- servers address * rpcprog_t prog; -- program number * rpcvers_t vers; -- version number * size_t sendsz; -- buffer recv size * size_t recvsz; -- buffer send size * int intrflag; -- is it interruptible */ extern CLIENT *clnt_vc_create(struct socket *so, struct sockaddr *svcaddr, rpcprog_t program, rpcvers_t version, size_t sendsz, size_t recvsz, int intrflag); /* * struct netconfig *nconf; -- network type * struct sockaddr *svcaddr; -- servers address * rpcprog_t prog; -- program number * rpcvers_t vers; -- version number * size_t sendsz; -- buffer recv size * size_t recvsz; -- buffer send size */ extern CLIENT *clnt_reconnect_create(struct netconfig *nconf, struct sockaddr *svcaddr, rpcprog_t program, rpcvers_t version, size_t sendsz, size_t recvsz); #else extern CLIENT *clnt_create(const char *, const rpcprog_t, const rpcvers_t, const char *); /* * * const char *hostname; -- hostname * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const char *nettype; -- network type */ /* * Generic client creation routine. Just like clnt_create(), except * it takes an additional timeout parameter. */ extern CLIENT * clnt_create_timed(const char *, const rpcprog_t, const rpcvers_t, const char *, const struct timeval *); /* * * const char *hostname; -- hostname * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const char *nettype; -- network type * const struct timeval *tp; -- timeout */ /* * Generic client creation routine. Supported protocols are which belong * to the nettype name space. */ extern CLIENT *clnt_create_vers(const char *, const rpcprog_t, rpcvers_t *, const rpcvers_t, const rpcvers_t, const char *); /* * const char *host; -- hostname * const rpcprog_t prog; -- program number * rpcvers_t *vers_out; -- servers highest available version * const rpcvers_t vers_low; -- low version number * const rpcvers_t vers_high; -- high version number * const char *nettype; -- network type */ /* * Generic client creation routine. Supported protocols are which belong * to the nettype name space. */ extern CLIENT * clnt_create_vers_timed(const char *, const rpcprog_t, rpcvers_t *, const rpcvers_t, const rpcvers_t, const char *, const struct timeval *); /* * const char *host; -- hostname * const rpcprog_t prog; -- program number * rpcvers_t *vers_out; -- servers highest available version * const rpcvers_t vers_low; -- low version number * const rpcvers_t vers_high; -- high version number * const char *nettype; -- network type * const struct timeval *tp -- timeout */ /* * Generic client creation routine. It takes a netconfig structure * instead of nettype */ extern CLIENT *clnt_tp_create(const char *, const rpcprog_t, const rpcvers_t, const struct netconfig *); /* * const char *hostname; -- hostname * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const struct netconfig *netconf; -- network config structure */ /* * Generic client creation routine. Just like clnt_tp_create(), except * it takes an additional timeout parameter. */ extern CLIENT * clnt_tp_create_timed(const char *, const rpcprog_t, const rpcvers_t, const struct netconfig *, const struct timeval *); /* * const char *hostname; -- hostname * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const struct netconfig *netconf; -- network config structure * const struct timeval *tp -- timeout */ /* * Generic TLI create routine. Only provided for compatibility. */ extern CLIENT *clnt_tli_create(const int, const struct netconfig *, struct netbuf *, const rpcprog_t, const rpcvers_t, const u_int, const u_int); /* * const int fd; -- fd * const struct netconfig *nconf; -- netconfig structure * struct netbuf *svcaddr; -- servers address * const u_long prog; -- program number * const u_long vers; -- version number * const u_int sendsz; -- send size * const u_int recvsz; -- recv size */ /* * Low level clnt create routine for connectionful transports, e.g. tcp. */ extern CLIENT *clnt_vc_create(const int, const struct netbuf *, const rpcprog_t, const rpcvers_t, u_int, u_int); /* * Added for compatibility to old rpc 4.0. Obsoleted by clnt_vc_create(). */ extern CLIENT *clntunix_create(struct sockaddr_un *, u_long, u_long, int *, u_int, u_int); /* * const int fd; -- open file descriptor * const struct netbuf *svcaddr; -- servers address * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const u_int sendsz; -- buffer recv size * const u_int recvsz; -- buffer send size */ /* * Low level clnt create routine for connectionless transports, e.g. udp. */ extern CLIENT *clnt_dg_create(const int, const struct netbuf *, const rpcprog_t, const rpcvers_t, const u_int, const u_int); /* * const int fd; -- open file descriptor * const struct netbuf *svcaddr; -- servers address * const rpcprog_t program; -- program number * const rpcvers_t version; -- version number * const u_int sendsz; -- buffer recv size * const u_int recvsz; -- buffer send size */ /* * Memory based rpc (for speed check and testing) * CLIENT * * clnt_raw_create(prog, vers) * u_long prog; * u_long vers; */ extern CLIENT *clnt_raw_create(rpcprog_t, rpcvers_t); #endif __END_DECLS /* * Print why creation failed */ __BEGIN_DECLS extern void clnt_pcreateerror(const char *); /* stderr */ extern char *clnt_spcreateerror(const char *); /* string */ __END_DECLS /* * Like clnt_perror(), but is more verbose in its output */ __BEGIN_DECLS extern void clnt_perrno(enum clnt_stat); /* stderr */ extern char *clnt_sperrno(enum clnt_stat); /* string */ __END_DECLS /* * Print an English error message, given the client error code */ __BEGIN_DECLS extern void clnt_perror(CLIENT *, const char *); /* stderr */ extern char *clnt_sperror(CLIENT *, const char *); /* string */ __END_DECLS /* * If a creation fails, the following allows the user to figure out why. */ struct rpc_createerr { enum clnt_stat cf_stat; struct rpc_err cf_error; /* useful when cf_stat == RPC_PMAPFAILURE */ }; #ifdef _KERNEL extern struct rpc_createerr rpc_createerr; #else __BEGIN_DECLS extern struct rpc_createerr *__rpc_createerr(void); __END_DECLS #define rpc_createerr (*(__rpc_createerr())) #endif #ifndef _KERNEL /* * The simplified interface: * enum clnt_stat * rpc_call(host, prognum, versnum, procnum, inproc, in, outproc, out, nettype) * const char *host; * const rpcprog_t prognum; * const rpcvers_t versnum; * const rpcproc_t procnum; * const xdrproc_t inproc, outproc; * const char *in; * char *out; * const char *nettype; */ __BEGIN_DECLS extern enum clnt_stat rpc_call(const char *, const rpcprog_t, const rpcvers_t, const rpcproc_t, const xdrproc_t, const char *, const xdrproc_t, char *, const char *); __END_DECLS /* * RPC broadcast interface * The call is broadcasted to all locally connected nets. * * extern enum clnt_stat * rpc_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, * eachresult, nettype) * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const rpcproc_t proc; -- procedure number * const xdrproc_t xargs; -- xdr routine for args * caddr_t argsp; -- pointer to args * const xdrproc_t xresults; -- xdr routine for results * caddr_t resultsp; -- pointer to results * const resultproc_t eachresult; -- call with each result * const char *nettype; -- Transport type * * For each valid response received, the procedure eachresult is called. * Its form is: * done = eachresult(resp, raddr, nconf) * bool_t done; * caddr_t resp; * struct netbuf *raddr; * struct netconfig *nconf; * where resp points to the results of the call and raddr is the * address if the responder to the broadcast. nconf is the transport * on which the response was received. * * extern enum clnt_stat * rpc_broadcast_exp(prog, vers, proc, xargs, argsp, xresults, resultsp, * eachresult, inittime, waittime, nettype) * const rpcprog_t prog; -- program number * const rpcvers_t vers; -- version number * const rpcproc_t proc; -- procedure number * const xdrproc_t xargs; -- xdr routine for args * caddr_t argsp; -- pointer to args * const xdrproc_t xresults; -- xdr routine for results * caddr_t resultsp; -- pointer to results * const resultproc_t eachresult; -- call with each result * const int inittime; -- how long to wait initially * const int waittime; -- maximum time to wait * const char *nettype; -- Transport type */ typedef bool_t (*resultproc_t)(caddr_t, ...); __BEGIN_DECLS extern enum clnt_stat rpc_broadcast(const rpcprog_t, const rpcvers_t, const rpcproc_t, const xdrproc_t, caddr_t, const xdrproc_t, caddr_t, const resultproc_t, const char *); extern enum clnt_stat rpc_broadcast_exp(const rpcprog_t, const rpcvers_t, const rpcproc_t, const xdrproc_t, caddr_t, const xdrproc_t, caddr_t, const resultproc_t, const int, const int, const char *); __END_DECLS /* For backward compatibility */ #include #endif #endif /* !_RPC_CLNT_H_ */ diff --git a/sys/rpc/clnt_rc.c b/sys/rpc/clnt_rc.c index 730001723e94..8c204989d0ea 100644 --- a/sys/rpc/clnt_rc.c +++ b/sys/rpc/clnt_rc.c @@ -1,548 +1,566 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ * Authors: Doug Rabson * Developed with Red Inc: Alfred Perlstein * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static enum clnt_stat clnt_reconnect_call(CLIENT *, struct rpc_callextra *, rpcproc_t, struct mbuf *, struct mbuf **, struct timeval); static void clnt_reconnect_geterr(CLIENT *, struct rpc_err *); static bool_t clnt_reconnect_freeres(CLIENT *, xdrproc_t, void *); static void clnt_reconnect_abort(CLIENT *); static bool_t clnt_reconnect_control(CLIENT *, u_int, void *); static void clnt_reconnect_close(CLIENT *); static void clnt_reconnect_destroy(CLIENT *); static struct clnt_ops clnt_reconnect_ops = { .cl_call = clnt_reconnect_call, .cl_abort = clnt_reconnect_abort, .cl_geterr = clnt_reconnect_geterr, .cl_freeres = clnt_reconnect_freeres, .cl_close = clnt_reconnect_close, .cl_destroy = clnt_reconnect_destroy, .cl_control = clnt_reconnect_control }; static int fake_wchan; CLIENT * clnt_reconnect_create( struct netconfig *nconf, /* network type */ struct sockaddr *svcaddr, /* servers address */ rpcprog_t program, /* program number */ rpcvers_t version, /* version number */ size_t sendsz, /* buffer recv size */ size_t recvsz) /* buffer send size */ { CLIENT *cl = NULL; /* client handle */ struct rc_data *rc = NULL; /* private data */ if (svcaddr == NULL) { rpc_createerr.cf_stat = RPC_UNKNOWNADDR; return (NULL); } cl = mem_alloc(sizeof (CLIENT)); rc = mem_alloc(sizeof (*rc)); mtx_init(&rc->rc_lock, "rc->rc_lock", NULL, MTX_DEF); (void) memcpy(&rc->rc_addr, svcaddr, (size_t)svcaddr->sa_len); rc->rc_nconf = nconf; rc->rc_prog = program; rc->rc_vers = version; rc->rc_sendsz = sendsz; rc->rc_recvsz = recvsz; rc->rc_timeout.tv_sec = -1; rc->rc_timeout.tv_usec = -1; rc->rc_retry.tv_sec = 3; rc->rc_retry.tv_usec = 0; rc->rc_retries = INT_MAX; rc->rc_privport = FALSE; rc->rc_waitchan = "rpcrecv"; rc->rc_intr = 0; rc->rc_connecting = FALSE; rc->rc_closed = FALSE; rc->rc_ucred = crdup(curthread->td_ucred); rc->rc_client = NULL; rc->rc_tls = false; + rc->rc_tlscertname = NULL; cl->cl_refs = 1; cl->cl_ops = &clnt_reconnect_ops; cl->cl_private = (caddr_t)(void *)rc; cl->cl_auth = authnone_create(); cl->cl_tp = NULL; cl->cl_netid = NULL; return (cl); } static enum clnt_stat clnt_reconnect_connect(CLIENT *cl) { struct thread *td = curthread; struct rc_data *rc = (struct rc_data *)cl->cl_private; struct socket *so; enum clnt_stat stat; int error; int one = 1; struct ucred *oldcred; CLIENT *newclient = NULL; uint64_t ssl[3]; uint32_t reterr; mtx_lock(&rc->rc_lock); while (rc->rc_connecting) { error = msleep(rc, &rc->rc_lock, rc->rc_intr ? PCATCH : 0, "rpcrecon", 0); if (error) { mtx_unlock(&rc->rc_lock); return (RPC_INTR); } } if (rc->rc_closed) { mtx_unlock(&rc->rc_lock); return (RPC_CANTSEND); } if (rc->rc_client) { mtx_unlock(&rc->rc_lock); return (RPC_SUCCESS); } /* * My turn to attempt a connect. The rc_connecting variable * serializes the following code sequence, so it is guaranteed * that rc_client will still be NULL after it is re-locked below, * since that is the only place it is set non-NULL. */ rc->rc_connecting = TRUE; mtx_unlock(&rc->rc_lock); oldcred = td->td_ucred; td->td_ucred = rc->rc_ucred; so = __rpc_nconf2socket(rc->rc_nconf); if (!so) { stat = rpc_createerr.cf_stat = RPC_TLIERROR; rpc_createerr.cf_error.re_errno = 0; td->td_ucred = oldcred; goto out; } if (rc->rc_privport) bindresvport(so, NULL); if (rc->rc_nconf->nc_semantics == NC_TPI_CLTS) newclient = clnt_dg_create(so, (struct sockaddr *) &rc->rc_addr, rc->rc_prog, rc->rc_vers, rc->rc_sendsz, rc->rc_recvsz); else { /* * I do not believe a timeout of less than 1sec would make * sense here since short delays can occur when a server is * temporarily overloaded. */ if (rc->rc_timeout.tv_sec > 0 && rc->rc_timeout.tv_usec >= 0) { error = so_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &rc->rc_timeout, sizeof(struct timeval)); if (error != 0) { stat = rpc_createerr.cf_stat = RPC_CANTSEND; rpc_createerr.cf_error.re_errno = error; td->td_ucred = oldcred; goto out; } } newclient = clnt_vc_create(so, (struct sockaddr *) &rc->rc_addr, rc->rc_prog, rc->rc_vers, rc->rc_sendsz, rc->rc_recvsz, rc->rc_intr); if (rc->rc_tls && newclient != NULL) { - stat = rpctls_connect(newclient, so, ssl, &reterr); + stat = rpctls_connect(newclient, rc->rc_tlscertname, so, + ssl, &reterr); if (stat != RPC_SUCCESS || reterr != RPCTLSERR_OK) { if (stat == RPC_SUCCESS) stat = RPC_FAILED; stat = rpc_createerr.cf_stat = stat; rpc_createerr.cf_error.re_errno = 0; CLNT_CLOSE(newclient); CLNT_RELEASE(newclient); newclient = NULL; td->td_ucred = oldcred; goto out; } } } td->td_ucred = oldcred; if (!newclient) { soclose(so); rc->rc_err = rpc_createerr.cf_error; stat = rpc_createerr.cf_stat; goto out; } CLNT_CONTROL(newclient, CLSET_FD_CLOSE, 0); CLNT_CONTROL(newclient, CLSET_CONNECT, &one); CLNT_CONTROL(newclient, CLSET_TIMEOUT, &rc->rc_timeout); CLNT_CONTROL(newclient, CLSET_RETRY_TIMEOUT, &rc->rc_retry); CLNT_CONTROL(newclient, CLSET_WAITCHAN, rc->rc_waitchan); CLNT_CONTROL(newclient, CLSET_INTERRUPTIBLE, &rc->rc_intr); if (rc->rc_tls) CLNT_CONTROL(newclient, CLSET_TLS, ssl); if (rc->rc_backchannel != NULL) CLNT_CONTROL(newclient, CLSET_BACKCHANNEL, rc->rc_backchannel); stat = RPC_SUCCESS; out: mtx_lock(&rc->rc_lock); KASSERT(rc->rc_client == NULL, ("rc_client not null")); if (!rc->rc_closed) { rc->rc_client = newclient; newclient = NULL; } rc->rc_connecting = FALSE; wakeup(rc); mtx_unlock(&rc->rc_lock); if (newclient) { /* * It has been closed, so discard the new client. * nb: clnt_[dg|vc]_close()/clnt_[dg|vc]_destroy() cannot * be called with the rc_lock mutex held, since they may * msleep() while holding a different mutex. */ CLNT_CLOSE(newclient); CLNT_RELEASE(newclient); } return (stat); } static enum clnt_stat clnt_reconnect_call( CLIENT *cl, /* client handle */ struct rpc_callextra *ext, /* call metadata */ rpcproc_t proc, /* procedure number */ struct mbuf *args, /* pointer to args */ struct mbuf **resultsp, /* pointer to results */ struct timeval utimeout) { struct rc_data *rc = (struct rc_data *)cl->cl_private; CLIENT *client; enum clnt_stat stat; int tries, error; tries = 0; do { mtx_lock(&rc->rc_lock); if (rc->rc_closed) { mtx_unlock(&rc->rc_lock); return (RPC_CANTSEND); } if (!rc->rc_client) { mtx_unlock(&rc->rc_lock); stat = clnt_reconnect_connect(cl); if (stat == RPC_SYSTEMERROR) { error = tsleep(&fake_wchan, rc->rc_intr ? PCATCH : 0, "rpccon", hz); if (error == EINTR || error == ERESTART) return (RPC_INTR); tries++; if (tries >= rc->rc_retries) return (stat); continue; } if (stat != RPC_SUCCESS) return (stat); mtx_lock(&rc->rc_lock); } if (!rc->rc_client) { mtx_unlock(&rc->rc_lock); stat = RPC_FAILED; continue; } CLNT_ACQUIRE(rc->rc_client); client = rc->rc_client; mtx_unlock(&rc->rc_lock); stat = CLNT_CALL_MBUF(client, ext, proc, args, resultsp, utimeout); if (stat != RPC_SUCCESS) { if (!ext) CLNT_GETERR(client, &rc->rc_err); } if (stat == RPC_TIMEDOUT) { /* * Check for async send misfeature for NLM * protocol. */ if ((rc->rc_timeout.tv_sec == 0 && rc->rc_timeout.tv_usec == 0) || (rc->rc_timeout.tv_sec == -1 && utimeout.tv_sec == 0 && utimeout.tv_usec == 0)) { CLNT_RELEASE(client); break; } } if (stat == RPC_TIMEDOUT || stat == RPC_CANTSEND || stat == RPC_CANTRECV) { tries++; if (tries >= rc->rc_retries) { CLNT_RELEASE(client); break; } if (ext && ext->rc_feedback) ext->rc_feedback(FEEDBACK_RECONNECT, proc, ext->rc_feedback_arg); mtx_lock(&rc->rc_lock); /* * Make sure that someone else hasn't already * reconnected by checking if rc_client has changed. * If not, we are done with the client and must * do CLNT_RELEASE(client) twice to dispose of it, * because there is both an initial refcnt and one * acquired by CLNT_ACQUIRE() above. */ if (rc->rc_client == client) { rc->rc_client = NULL; mtx_unlock(&rc->rc_lock); CLNT_RELEASE(client); } else { mtx_unlock(&rc->rc_lock); } CLNT_RELEASE(client); } else { CLNT_RELEASE(client); break; } } while (stat != RPC_SUCCESS); KASSERT(stat != RPC_SUCCESS || *resultsp, ("RPC_SUCCESS without reply")); return (stat); } static void clnt_reconnect_geterr(CLIENT *cl, struct rpc_err *errp) { struct rc_data *rc = (struct rc_data *)cl->cl_private; *errp = rc->rc_err; } /* * Since this function requires that rc_client be valid, it can * only be called when that is guaranteed to be the case. */ static bool_t clnt_reconnect_freeres(CLIENT *cl, xdrproc_t xdr_res, void *res_ptr) { struct rc_data *rc = (struct rc_data *)cl->cl_private; return (CLNT_FREERES(rc->rc_client, xdr_res, res_ptr)); } /*ARGSUSED*/ static void clnt_reconnect_abort(CLIENT *h) { } /* * CLNT_CONTROL() on the client returned by clnt_reconnect_create() must * always be called before CLNT_CALL_MBUF() by a single thread only. */ static bool_t clnt_reconnect_control(CLIENT *cl, u_int request, void *info) { struct rc_data *rc = (struct rc_data *)cl->cl_private; SVCXPRT *xprt; + size_t slen; if (info == NULL) { return (FALSE); } switch (request) { case CLSET_TIMEOUT: rc->rc_timeout = *(struct timeval *)info; if (rc->rc_client) CLNT_CONTROL(rc->rc_client, request, info); break; case CLGET_TIMEOUT: *(struct timeval *)info = rc->rc_timeout; break; case CLSET_RETRY_TIMEOUT: rc->rc_retry = *(struct timeval *)info; if (rc->rc_client) CLNT_CONTROL(rc->rc_client, request, info); break; case CLGET_RETRY_TIMEOUT: *(struct timeval *)info = rc->rc_retry; break; case CLGET_VERS: *(uint32_t *)info = rc->rc_vers; break; case CLSET_VERS: rc->rc_vers = *(uint32_t *) info; if (rc->rc_client) CLNT_CONTROL(rc->rc_client, CLSET_VERS, info); break; case CLGET_PROG: *(uint32_t *)info = rc->rc_prog; break; case CLSET_PROG: rc->rc_prog = *(uint32_t *) info; if (rc->rc_client) CLNT_CONTROL(rc->rc_client, request, info); break; case CLSET_WAITCHAN: rc->rc_waitchan = (char *)info; if (rc->rc_client) CLNT_CONTROL(rc->rc_client, request, info); break; case CLGET_WAITCHAN: *(const char **) info = rc->rc_waitchan; break; case CLSET_INTERRUPTIBLE: rc->rc_intr = *(int *) info; if (rc->rc_client) CLNT_CONTROL(rc->rc_client, request, info); break; case CLGET_INTERRUPTIBLE: *(int *) info = rc->rc_intr; break; case CLSET_RETRIES: rc->rc_retries = *(int *) info; break; case CLGET_RETRIES: *(int *) info = rc->rc_retries; break; case CLSET_PRIVPORT: rc->rc_privport = *(int *) info; break; case CLGET_PRIVPORT: *(int *) info = rc->rc_privport; break; case CLSET_BACKCHANNEL: xprt = (SVCXPRT *)info; xprt_register(xprt); rc->rc_backchannel = info; break; case CLSET_TLS: rc->rc_tls = true; break; + case CLSET_TLSCERTNAME: + slen = strlen(info) + 1; + /* + * tlscertname with "key.pem" appended to it forms a file + * name. As such, the maximum allowable strlen(info) is + * NAME_MAX - 7. However, "slen" includes the nul termination + * byte so it can be up to NAME_MAX - 6. + */ + if (slen <= 1 || slen > NAME_MAX - 6) + return (FALSE); + rc->rc_tlscertname = mem_alloc(slen); + strlcpy(rc->rc_tlscertname, info, slen); + break; + default: return (FALSE); } return (TRUE); } static void clnt_reconnect_close(CLIENT *cl) { struct rc_data *rc = (struct rc_data *)cl->cl_private; CLIENT *client; mtx_lock(&rc->rc_lock); if (rc->rc_closed) { mtx_unlock(&rc->rc_lock); return; } rc->rc_closed = TRUE; client = rc->rc_client; rc->rc_client = NULL; mtx_unlock(&rc->rc_lock); if (client) { CLNT_CLOSE(client); CLNT_RELEASE(client); } } static void clnt_reconnect_destroy(CLIENT *cl) { struct rc_data *rc = (struct rc_data *)cl->cl_private; SVCXPRT *xprt; if (rc->rc_client) CLNT_DESTROY(rc->rc_client); if (rc->rc_backchannel) { xprt = (SVCXPRT *)rc->rc_backchannel; xprt_unregister(xprt); SVC_RELEASE(xprt); } crfree(rc->rc_ucred); mtx_destroy(&rc->rc_lock); + mem_free(rc->rc_tlscertname, 0); /* 0 ok, since arg. ignored. */ mem_free(rc, sizeof(*rc)); mem_free(cl, sizeof (CLIENT)); } diff --git a/sys/rpc/krpc.h b/sys/rpc/krpc.h index 53d46deddf65..77facdcf16cc 100644 --- a/sys/rpc/krpc.h +++ b/sys/rpc/krpc.h @@ -1,132 +1,133 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _RPC_KRPC_H_ #define _RPC_KRPC_H_ #ifdef _KERNEL /* * Definitions now shared between client and server RPC for backchannels. */ #define MCALL_MSG_SIZE 24 void clnt_bck_svccall(void *, struct mbuf *, uint32_t); enum clnt_stat clnt_bck_call(CLIENT *, struct rpc_callextra *, rpcproc_t, struct mbuf *, struct mbuf **, struct timeval, SVCXPRT *); struct mbuf *_rpc_copym_into_ext_pgs(struct mbuf *, int); /* * A pending RPC request which awaits a reply. Requests which have * received their reply will have cr_xid set to zero and cr_mrep to * the mbuf chain of the reply. */ struct ct_request { TAILQ_ENTRY(ct_request) cr_link; uint32_t cr_xid; /* XID of request */ struct mbuf *cr_mrep; /* reply received by upcall */ int cr_error; /* any error from upcall */ char cr_verf[MAX_AUTH_BYTES]; /* reply verf */ }; TAILQ_HEAD(ct_request_list, ct_request); struct rc_data { struct mtx rc_lock; struct sockaddr_storage rc_addr; /* server address */ struct netconfig* rc_nconf; /* network type */ rpcprog_t rc_prog; /* program number */ rpcvers_t rc_vers; /* version number */ size_t rc_sendsz; size_t rc_recvsz; struct timeval rc_timeout; struct timeval rc_retry; int rc_retries; int rc_privport; char *rc_waitchan; int rc_intr; int rc_connecting; int rc_closed; struct ucred *rc_ucred; CLIENT* rc_client; /* underlying RPC client */ struct rpc_err rc_err; void *rc_backchannel; bool rc_tls; /* Enable TLS on connection */ + char *rc_tlscertname; }; /* Bits for ct_rcvstate. */ #define RPCRCVSTATE_NORMAL 0x01 /* Normal reception. */ #define RPCRCVSTATE_NONAPPDATA 0x02 /* Reception of a non-application record. */ #define RPCRCVSTATE_TLSHANDSHAKE 0x04 /* Reception blocked for TLS handshake. */ #define RPCRCVSTATE_UPCALLNEEDED 0x08 /* Upcall to rpctlscd needed. */ #define RPCRCVSTATE_UPCALLINPROG 0x10 /* Upcall to rpctlscd in progress. */ #define RPCRCVSTATE_SOUPCALLNEEDED 0x20 /* Socket upcall needed. */ #define RPCRCVSTATE_UPCALLTHREAD 0x40 /* Upcall kthread running. */ struct ct_data { struct mtx ct_lock; int ct_threads; /* number of threads in clnt_vc_call */ bool_t ct_closing; /* TRUE if we are closing */ bool_t ct_closed; /* TRUE if we are closed */ struct socket *ct_socket; /* connection socket */ bool_t ct_closeit; /* close it on destroy */ struct timeval ct_wait; /* wait interval in milliseconds */ struct sockaddr_storage ct_addr; /* remote addr */ struct rpc_err ct_error; uint32_t ct_xid; char ct_mcallc[MCALL_MSG_SIZE]; /* marshalled callmsg */ size_t ct_mpos; /* pos after marshal */ const char *ct_waitchan; int ct_waitflag; struct mbuf *ct_record; /* current reply record */ size_t ct_record_resid; /* how much left of reply to read */ bool_t ct_record_eor; /* true if reading last fragment */ struct ct_request_list ct_pending; int ct_upcallrefs; /* Ref cnt of upcalls in prog. */ SVCXPRT *ct_backchannelxprt; /* xprt for backchannel */ uint64_t ct_sslsec; /* RPC-over-TLS connection. */ uint64_t ct_sslusec; uint64_t ct_sslrefno; uint32_t ct_rcvstate; /* Handle receiving for TLS upcalls */ struct mbuf *ct_raw; /* Raw mbufs recv'd */ }; struct cf_conn { /* kept in xprt->xp_p1 for actual connection */ enum xprt_stat strm_stat; struct mbuf *mpending; /* unparsed data read from the socket */ struct mbuf *mreq; /* current record being built from mpending */ uint32_t resid; /* number of bytes needed for fragment */ bool_t eor; /* reading last fragment of current record */ }; #endif /* _KERNEL */ #endif /* _RPC_KRPC_H_ */ diff --git a/sys/rpc/rpcsec_tls.h b/sys/rpc/rpcsec_tls.h index a33feff17c06..49a7e71b7514 100644 --- a/sys/rpc/rpcsec_tls.h +++ b/sys/rpc/rpcsec_tls.h @@ -1,87 +1,87 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2020 Rick Macklem * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _RPC_RPCSEC_TLS_H_ #define _RPC_RPCSEC_TLS_H_ /* Operation values for rpctls syscall. */ #define RPCTLS_SYSC_CLSETPATH 1 #define RPCTLS_SYSC_CLSOCKET 2 #define RPCTLS_SYSC_CLSHUTDOWN 3 #define RPCTLS_SYSC_SRVSETPATH 4 #define RPCTLS_SYSC_SRVSOCKET 5 #define RPCTLS_SYSC_SRVSHUTDOWN 6 /* System call used by the rpctlscd, rpctlssd daemons. */ int rpctls_syscall(int, const char *); /* Flag bits to indicate certificate results. */ #define RPCTLS_FLAGS_HANDSHAKE 0x01 #define RPCTLS_FLAGS_GOTCERT 0x02 #define RPCTLS_FLAGS_SELFSIGNED 0x04 #define RPCTLS_FLAGS_VERIFIED 0x08 #define RPCTLS_FLAGS_DISABLED 0x10 #define RPCTLS_FLAGS_CERTUSER 0x20 #define RPCTLS_FLAGS_HANDSHFAIL 0x40 /* Error return values for upcall rpcs. */ #define RPCTLSERR_OK 0 #define RPCTLSERR_NOCLOSE 1 #define RPCTLSERR_NOSSL 2 #define RPCTLSERR_NOSOCKET 3 #ifdef _KERNEL /* Functions that perform upcalls to the rpctlsd daemon. */ -enum clnt_stat rpctls_connect(CLIENT *newclient, struct socket *so, - uint64_t *sslp, uint32_t *reterr); +enum clnt_stat rpctls_connect(CLIENT *newclient, char *certname, + struct socket *so, uint64_t *sslp, uint32_t *reterr); enum clnt_stat rpctls_cl_handlerecord(uint64_t sec, uint64_t usec, uint64_t ssl, uint32_t *reterr); enum clnt_stat rpctls_srv_handlerecord(uint64_t sec, uint64_t usec, uint64_t ssl, uint32_t *reterr); enum clnt_stat rpctls_cl_disconnect(uint64_t sec, uint64_t usec, uint64_t ssl, uint32_t *reterr); enum clnt_stat rpctls_srv_disconnect(uint64_t sec, uint64_t usec, uint64_t ssl, uint32_t *reterr); /* Initialization function for rpcsec_tls. */ int rpctls_init(void); /* Get TLS information function. */ bool rpctls_getinfo(u_int *maxlen, bool rpctlscd_run, bool rpctlssd_run); /* String for AUTH_TLS reply verifier. */ #define RPCTLS_START_STRING "STARTTLS" /* ssl refno value to indicate TLS handshake being done. */ #define RPCTLS_REFNO_HANDSHAKE 0xFFFFFFFFFFFFFFFFULL #endif /* _KERNEL */ #endif /* _RPC_RPCSEC_TLS_H_ */ diff --git a/sys/rpc/rpcsec_tls/rpctls_impl.c b/sys/rpc/rpcsec_tls/rpctls_impl.c index c5f3ce5d46a6..638f27eaf350 100644 --- a/sys/rpc/rpcsec_tls/rpctls_impl.c +++ b/sys/rpc/rpcsec_tls/rpctls_impl.c @@ -1,725 +1,731 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ * Authors: Doug Rabson * Developed with Red Inc: Alfred Perlstein * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* Modified from the kernel GSSAPI code for RPC-over-TLS. */ #include __FBSDID("$FreeBSD$"); #include "opt_kern_tls.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "rpctlscd.h" #include "rpctlssd.h" /* * Syscall hooks */ static struct syscall_helper_data rpctls_syscalls[] = { SYSCALL_INIT_HELPER(rpctls_syscall), SYSCALL_INIT_LAST }; static CLIENT *rpctls_connect_handle; static struct mtx rpctls_connect_lock; static struct socket *rpctls_connect_so = NULL; static CLIENT *rpctls_connect_cl = NULL; static CLIENT *rpctls_server_handle; static struct mtx rpctls_server_lock; static struct socket *rpctls_server_so = NULL; static SVCXPRT *rpctls_server_xprt = NULL; static struct opaque_auth rpctls_null_verf; static CLIENT *rpctls_connect_client(void); static CLIENT *rpctls_server_client(void); static enum clnt_stat rpctls_server(SVCXPRT *xprt, struct socket *so, uint32_t *flags, uint64_t *sslp, uid_t *uid, int *ngrps, gid_t **gids); int rpctls_init(void) { int error; error = syscall_helper_register(rpctls_syscalls, SY_THR_STATIC_KLD); if (error != 0) { printf("rpctls_init: cannot register syscall\n"); return (error); } mtx_init(&rpctls_connect_lock, "rpctls_connect_lock", NULL, MTX_DEF); mtx_init(&rpctls_server_lock, "rpctls_server_lock", NULL, MTX_DEF); rpctls_null_verf.oa_flavor = AUTH_NULL; rpctls_null_verf.oa_base = RPCTLS_START_STRING; rpctls_null_verf.oa_length = strlen(RPCTLS_START_STRING); return (0); } int sys_rpctls_syscall(struct thread *td, struct rpctls_syscall_args *uap) { struct sockaddr_un sun; struct netconfig *nconf; struct file *fp; struct socket *so; SVCXPRT *xprt; char path[MAXPATHLEN]; int fd = -1, error, try_count; CLIENT *cl, *oldcl, *concl; uint64_t ssl[3]; struct timeval timeo; #ifdef KERN_TLS u_int maxlen; #endif error = priv_check(td, PRIV_NFS_DAEMON); if (error != 0) return (error); switch (uap->op) { case RPCTLS_SYSC_CLSETPATH: error = copyinstr(uap->path, path, sizeof(path), NULL); if (error == 0) { error = ENXIO; #ifdef KERN_TLS if (rpctls_getinfo(&maxlen, false, false)) error = 0; #endif } if (error == 0 && (strlen(path) + 1 > sizeof(sun.sun_path) || strlen(path) == 0)) error = EINVAL; cl = NULL; if (error == 0) { sun.sun_family = AF_LOCAL; strlcpy(sun.sun_path, path, sizeof(sun.sun_path)); sun.sun_len = SUN_LEN(&sun); nconf = getnetconfigent("local"); cl = clnt_reconnect_create(nconf, (struct sockaddr *)&sun, RPCTLSCD, RPCTLSCDVERS, RPC_MAXDATASIZE, RPC_MAXDATASIZE); /* * The number of retries defaults to INT_MAX, which * effectively means an infinite, uninterruptable loop. * Set the try_count to 1 so that no retries of the * RPC occur. Since it is an upcall to a local daemon, * requests should not be lost and doing one of these * RPCs multiple times is not correct. * If the server is not working correctly, the * daemon can get stuck in SSL_connect() trying * to read data from the socket during the upcall. * Set a timeout (currently 15sec) and assume the * daemon is hung when the timeout occurs. */ if (cl != NULL) { try_count = 1; CLNT_CONTROL(cl, CLSET_RETRIES, &try_count); timeo.tv_sec = 15; timeo.tv_usec = 0; CLNT_CONTROL(cl, CLSET_TIMEOUT, &timeo); } else error = EINVAL; } mtx_lock(&rpctls_connect_lock); oldcl = rpctls_connect_handle; rpctls_connect_handle = cl; mtx_unlock(&rpctls_connect_lock); if (oldcl != NULL) { CLNT_CLOSE(oldcl); CLNT_RELEASE(oldcl); } break; case RPCTLS_SYSC_SRVSETPATH: error = copyinstr(uap->path, path, sizeof(path), NULL); if (error == 0) { error = ENXIO; #ifdef KERN_TLS if (rpctls_getinfo(&maxlen, false, false)) error = 0; #endif } if (error == 0 && (strlen(path) + 1 > sizeof(sun.sun_path) || strlen(path) == 0)) error = EINVAL; cl = NULL; if (error == 0) { sun.sun_family = AF_LOCAL; strlcpy(sun.sun_path, path, sizeof(sun.sun_path)); sun.sun_len = SUN_LEN(&sun); nconf = getnetconfigent("local"); cl = clnt_reconnect_create(nconf, (struct sockaddr *)&sun, RPCTLSSD, RPCTLSSDVERS, RPC_MAXDATASIZE, RPC_MAXDATASIZE); /* * The number of retries defaults to INT_MAX, which * effectively means an infinite, uninterruptable loop. * Set the try_count to 1 so that no retries of the * RPC occur. Since it is an upcall to a local daemon, * requests should not be lost and doing one of these * RPCs multiple times is not correct. * Set a timeout (currently 15sec) and assume that * the daemon is hung if a timeout occurs. */ if (cl != NULL) { try_count = 1; CLNT_CONTROL(cl, CLSET_RETRIES, &try_count); timeo.tv_sec = 15; timeo.tv_usec = 0; CLNT_CONTROL(cl, CLSET_TIMEOUT, &timeo); } else error = EINVAL; } mtx_lock(&rpctls_server_lock); oldcl = rpctls_server_handle; rpctls_server_handle = cl; mtx_unlock(&rpctls_server_lock); if (oldcl != NULL) { CLNT_CLOSE(oldcl); CLNT_RELEASE(oldcl); } break; case RPCTLS_SYSC_CLSHUTDOWN: mtx_lock(&rpctls_connect_lock); oldcl = rpctls_connect_handle; rpctls_connect_handle = NULL; mtx_unlock(&rpctls_connect_lock); if (oldcl != NULL) { CLNT_CLOSE(oldcl); CLNT_RELEASE(oldcl); } break; case RPCTLS_SYSC_SRVSHUTDOWN: mtx_lock(&rpctls_server_lock); oldcl = rpctls_server_handle; rpctls_server_handle = NULL; mtx_unlock(&rpctls_server_lock); if (oldcl != NULL) { CLNT_CLOSE(oldcl); CLNT_RELEASE(oldcl); } break; case RPCTLS_SYSC_CLSOCKET: mtx_lock(&rpctls_connect_lock); so = rpctls_connect_so; rpctls_connect_so = NULL; concl = rpctls_connect_cl; rpctls_connect_cl = NULL; mtx_unlock(&rpctls_connect_lock); if (so != NULL) { error = falloc(td, &fp, &fd, 0); if (error == 0) { /* * Set ssl refno so that clnt_vc_destroy() will * not close the socket and will leave that for * the daemon to do. */ soref(so); ssl[0] = ssl[1] = 0; ssl[2] = RPCTLS_REFNO_HANDSHAKE; CLNT_CONTROL(concl, CLSET_TLS, ssl); finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops); fdrop(fp, td); /* Drop fp reference. */ td->td_retval[0] = fd; } } else error = EPERM; break; case RPCTLS_SYSC_SRVSOCKET: mtx_lock(&rpctls_server_lock); so = rpctls_server_so; rpctls_server_so = NULL; xprt = rpctls_server_xprt; rpctls_server_xprt = NULL; mtx_unlock(&rpctls_server_lock); if (so != NULL) { error = falloc(td, &fp, &fd, 0); if (error == 0) { /* * Once this file descriptor is associated * with the socket, it cannot be closed by * the server side krpc code (svc_vc.c). */ soref(so); sx_xlock(&xprt->xp_lock); xprt->xp_tls = RPCTLS_FLAGS_HANDSHFAIL; sx_xunlock(&xprt->xp_lock); finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops); fdrop(fp, td); /* Drop fp reference. */ td->td_retval[0] = fd; } } else error = EPERM; break; default: error = EINVAL; } return (error); } /* * Acquire the rpctls_connect_handle and return it with a reference count, * if it is available. */ static CLIENT * rpctls_connect_client(void) { CLIENT *cl; mtx_lock(&rpctls_connect_lock); cl = rpctls_connect_handle; if (cl != NULL) CLNT_ACQUIRE(cl); mtx_unlock(&rpctls_connect_lock); return (cl); } /* * Acquire the rpctls_server_handle and return it with a reference count, * if it is available. */ static CLIENT * rpctls_server_client(void) { CLIENT *cl; mtx_lock(&rpctls_server_lock); cl = rpctls_server_handle; if (cl != NULL) CLNT_ACQUIRE(cl); mtx_unlock(&rpctls_server_lock); return (cl); } /* Do an upcall for a new socket connect using TLS. */ enum clnt_stat -rpctls_connect(CLIENT *newclient, struct socket *so, uint64_t *sslp, - uint32_t *reterr) +rpctls_connect(CLIENT *newclient, char *certname, struct socket *so, + uint64_t *sslp, uint32_t *reterr) { + struct rpctlscd_connect_arg arg; struct rpctlscd_connect_res res; struct rpc_callextra ext; struct timeval utimeout; enum clnt_stat stat; CLIENT *cl; int val; static bool rpctls_connect_busy = false; cl = rpctls_connect_client(); if (cl == NULL) return (RPC_AUTHERROR); /* First, do the AUTH_TLS NULL RPC. */ memset(&ext, 0, sizeof(ext)); utimeout.tv_sec = 30; utimeout.tv_usec = 0; ext.rc_auth = authtls_create(); stat = clnt_call_private(newclient, &ext, NULLPROC, (xdrproc_t)xdr_void, NULL, (xdrproc_t)xdr_void, NULL, utimeout); AUTH_DESTROY(ext.rc_auth); if (stat == RPC_AUTHERROR) return (stat); if (stat != RPC_SUCCESS) return (RPC_SYSTEMERROR); /* Serialize the connect upcalls. */ mtx_lock(&rpctls_connect_lock); while (rpctls_connect_busy) msleep(&rpctls_connect_busy, &rpctls_connect_lock, PVFS, "rtlscn", 0); rpctls_connect_busy = true; rpctls_connect_so = so; rpctls_connect_cl = newclient; mtx_unlock(&rpctls_connect_lock); /* Temporarily block reception during the handshake upcall. */ val = 1; CLNT_CONTROL(newclient, CLSET_BLOCKRCV, &val); /* Do the connect handshake upcall. */ - stat = rpctlscd_connect_1(NULL, &res, cl); + if (certname != NULL) { + arg.certname.certname_len = strlen(certname); + arg.certname.certname_val = certname; + } else + arg.certname.certname_len = 0; + stat = rpctlscd_connect_1(&arg, &res, cl); if (stat == RPC_SUCCESS) { *reterr = res.reterr; if (res.reterr == 0) { *sslp++ = res.sec; *sslp++ = res.usec; *sslp = res.ssl; } } else if (stat == RPC_TIMEDOUT) { /* * Do a shutdown on the socket, since the daemon is probably * stuck in SSL_connect() trying to read the socket. * Do not soclose() the socket, since the daemon will close() * the socket after SSL_connect() returns an error. */ soshutdown(so, SHUT_RD); } CLNT_RELEASE(cl); /* Unblock reception. */ val = 0; CLNT_CONTROL(newclient, CLSET_BLOCKRCV, &val); /* Once the upcall is done, the daemon is done with the fp and so. */ mtx_lock(&rpctls_connect_lock); rpctls_connect_so = NULL; rpctls_connect_cl = NULL; rpctls_connect_busy = false; wakeup(&rpctls_connect_busy); mtx_unlock(&rpctls_connect_lock); return (stat); } /* Do an upcall to handle an non-application data record using TLS. */ enum clnt_stat rpctls_cl_handlerecord(uint64_t sec, uint64_t usec, uint64_t ssl, uint32_t *reterr) { struct rpctlscd_handlerecord_arg arg; struct rpctlscd_handlerecord_res res; enum clnt_stat stat; CLIENT *cl; cl = rpctls_connect_client(); if (cl == NULL) { *reterr = RPCTLSERR_NOSSL; return (RPC_SUCCESS); } /* Do the handlerecord upcall. */ arg.sec = sec; arg.usec = usec; arg.ssl = ssl; stat = rpctlscd_handlerecord_1(&arg, &res, cl); CLNT_RELEASE(cl); if (stat == RPC_SUCCESS) *reterr = res.reterr; return (stat); } enum clnt_stat rpctls_srv_handlerecord(uint64_t sec, uint64_t usec, uint64_t ssl, uint32_t *reterr) { struct rpctlssd_handlerecord_arg arg; struct rpctlssd_handlerecord_res res; enum clnt_stat stat; CLIENT *cl; cl = rpctls_server_client(); if (cl == NULL) { *reterr = RPCTLSERR_NOSSL; return (RPC_SUCCESS); } /* Do the handlerecord upcall. */ arg.sec = sec; arg.usec = usec; arg.ssl = ssl; stat = rpctlssd_handlerecord_1(&arg, &res, cl); CLNT_RELEASE(cl); if (stat == RPC_SUCCESS) *reterr = res.reterr; return (stat); } /* Do an upcall to shut down a socket using TLS. */ enum clnt_stat rpctls_cl_disconnect(uint64_t sec, uint64_t usec, uint64_t ssl, uint32_t *reterr) { struct rpctlscd_disconnect_arg arg; struct rpctlscd_disconnect_res res; enum clnt_stat stat; CLIENT *cl; cl = rpctls_connect_client(); if (cl == NULL) { *reterr = RPCTLSERR_NOSSL; return (RPC_SUCCESS); } /* Do the disconnect upcall. */ arg.sec = sec; arg.usec = usec; arg.ssl = ssl; stat = rpctlscd_disconnect_1(&arg, &res, cl); CLNT_RELEASE(cl); if (stat == RPC_SUCCESS) *reterr = res.reterr; return (stat); } enum clnt_stat rpctls_srv_disconnect(uint64_t sec, uint64_t usec, uint64_t ssl, uint32_t *reterr) { struct rpctlssd_disconnect_arg arg; struct rpctlssd_disconnect_res res; enum clnt_stat stat; CLIENT *cl; cl = rpctls_server_client(); if (cl == NULL) { *reterr = RPCTLSERR_NOSSL; return (RPC_SUCCESS); } /* Do the disconnect upcall. */ arg.sec = sec; arg.usec = usec; arg.ssl = ssl; stat = rpctlssd_disconnect_1(&arg, &res, cl); CLNT_RELEASE(cl); if (stat == RPC_SUCCESS) *reterr = res.reterr; return (stat); } /* Do an upcall for a new server socket using TLS. */ static enum clnt_stat rpctls_server(SVCXPRT *xprt, struct socket *so, uint32_t *flags, uint64_t *sslp, uid_t *uid, int *ngrps, gid_t **gids) { enum clnt_stat stat; CLIENT *cl; struct rpctlssd_connect_res res; gid_t *gidp; uint32_t *gidv; int i; static bool rpctls_server_busy = false; cl = rpctls_server_client(); if (cl == NULL) return (RPC_SYSTEMERROR); /* Serialize the server upcalls. */ mtx_lock(&rpctls_server_lock); while (rpctls_server_busy) msleep(&rpctls_server_busy, &rpctls_server_lock, PVFS, "rtlssn", 0); rpctls_server_busy = true; rpctls_server_so = so; rpctls_server_xprt = xprt; mtx_unlock(&rpctls_server_lock); /* Do the server upcall. */ stat = rpctlssd_connect_1(NULL, &res, cl); if (stat == RPC_SUCCESS) { *flags = res.flags; *sslp++ = res.sec; *sslp++ = res.usec; *sslp = res.ssl; if ((*flags & (RPCTLS_FLAGS_CERTUSER | RPCTLS_FLAGS_DISABLED)) == RPCTLS_FLAGS_CERTUSER) { *ngrps = res.gid.gid_len; *uid = res.uid; *gids = gidp = mem_alloc(*ngrps * sizeof(gid_t)); gidv = res.gid.gid_val; for (i = 0; i < *ngrps; i++) *gidp++ = *gidv++; } } else if (stat == RPC_TIMEDOUT) { /* * Do a shutdown on the socket, since the daemon is probably * stuck in SSL_accept() trying to read the socket. * Do not soclose() the socket, since the daemon will close() * the socket after SSL_accept() returns an error. */ soshutdown(so, SHUT_RD); } CLNT_RELEASE(cl); /* Once the upcall is done, the daemon is done with the fp and so. */ mtx_lock(&rpctls_server_lock); rpctls_server_so = NULL; rpctls_server_xprt = NULL; rpctls_server_busy = false; wakeup(&rpctls_server_busy); mtx_unlock(&rpctls_server_lock); return (stat); } /* * Handle the NULL RPC with authentication flavor of AUTH_TLS. * This is a STARTTLS command, so do the upcall to the rpctlssd daemon, * which will do the TLS handshake. */ enum auth_stat _svcauth_rpcsec_tls(struct svc_req *rqst, struct rpc_msg *msg) { bool_t call_stat; enum clnt_stat stat; SVCXPRT *xprt; uint32_t flags; uint64_t ssl[3]; int ngrps; uid_t uid; gid_t *gidp; #ifdef KERN_TLS u_int maxlen; #endif /* Initialize reply. */ rqst->rq_verf = rpctls_null_verf; /* Check client credentials. */ if (rqst->rq_cred.oa_length != 0 || msg->rm_call.cb_verf.oa_length != 0 || msg->rm_call.cb_verf.oa_flavor != AUTH_NULL) return (AUTH_BADCRED); if (rqst->rq_proc != NULLPROC) return (AUTH_REJECTEDCRED); call_stat = FALSE; #ifdef KERN_TLS if (rpctls_getinfo(&maxlen, false, true)) call_stat = TRUE; #endif if (!call_stat) return (AUTH_REJECTEDCRED); /* * Disable reception for the krpc so that the TLS handshake can * be done on the socket in the rpctlssd daemon. */ xprt = rqst->rq_xprt; sx_xlock(&xprt->xp_lock); xprt->xp_dontrcv = TRUE; sx_xunlock(&xprt->xp_lock); /* * Send the reply to the NULL RPC with AUTH_TLS, which is the * STARTTLS command for Sun RPC. */ call_stat = svc_sendreply(rqst, (xdrproc_t)xdr_void, NULL); if (!call_stat) { sx_xlock(&xprt->xp_lock); xprt->xp_dontrcv = FALSE; sx_xunlock(&xprt->xp_lock); xprt_active(xprt); /* Harmless if already active. */ return (AUTH_REJECTEDCRED); } /* Do an upcall to do the TLS handshake. */ stat = rpctls_server(xprt, xprt->xp_socket, &flags, ssl, &uid, &ngrps, &gidp); /* Re-enable reception on the socket within the krpc. */ sx_xlock(&xprt->xp_lock); xprt->xp_dontrcv = FALSE; if (stat == RPC_SUCCESS) { xprt->xp_tls = flags; xprt->xp_sslsec = ssl[0]; xprt->xp_sslusec = ssl[1]; xprt->xp_sslrefno = ssl[2]; if ((flags & (RPCTLS_FLAGS_CERTUSER | RPCTLS_FLAGS_DISABLED)) == RPCTLS_FLAGS_CERTUSER) { xprt->xp_ngrps = ngrps; xprt->xp_uid = uid; xprt->xp_gidp = gidp; } } sx_xunlock(&xprt->xp_lock); xprt_active(xprt); /* Harmless if already active. */ return (RPCSEC_GSS_NODISPATCH); } /* * Get kern.ipc.tls.enable and kern.ipc.tls.maxlen. */ bool rpctls_getinfo(u_int *maxlenp, bool rpctlscd_run, bool rpctlssd_run) { u_int maxlen; bool enable; int error; size_t siz; if (PMAP_HAS_DMAP == 0 || !mb_use_ext_pgs) return (false); siz = sizeof(enable); error = kernel_sysctlbyname(curthread, "kern.ipc.tls.enable", &enable, &siz, NULL, 0, NULL, 0); if (error != 0) return (false); siz = sizeof(maxlen); error = kernel_sysctlbyname(curthread, "kern.ipc.tls.maxlen", &maxlen, &siz, NULL, 0, NULL, 0); if (error != 0) return (false); if (rpctlscd_run && rpctls_connect_handle == NULL) return (false); if (rpctlssd_run && rpctls_server_handle == NULL) return (false); *maxlenp = maxlen; return (enable); } diff --git a/sys/rpc/rpcsec_tls/rpctlscd.x b/sys/rpc/rpcsec_tls/rpctlscd.x index d5c4d61a43a8..1ae53d7b8d17 100644 --- a/sys/rpc/rpcsec_tls/rpctlscd.x +++ b/sys/rpc/rpcsec_tls/rpctlscd.x @@ -1,72 +1,76 @@ /*- * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ * Authors: Doug Rabson * Developed with Red Inc: Alfred Perlstein * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* Modified from gssd.x for the client side of RPC-over-TLS. */ /* $FreeBSD$ */ +struct rpctlscd_connect_arg { + char certname<>; +}; + struct rpctlscd_connect_res { uint32_t reterr; uint64_t sec; uint64_t usec; uint64_t ssl; }; struct rpctlscd_handlerecord_arg { uint64_t sec; uint64_t usec; uint64_t ssl; }; struct rpctlscd_handlerecord_res { uint32_t reterr; }; struct rpctlscd_disconnect_arg { uint64_t sec; uint64_t usec; uint64_t ssl; }; struct rpctlscd_disconnect_res { uint32_t reterr; }; program RPCTLSCD { version RPCTLSCDVERS { void RPCTLSCD_NULL(void) = 0; rpctlscd_connect_res - RPCTLSCD_CONNECT(void) = 1; + RPCTLSCD_CONNECT(rpctlscd_connect_arg) = 1; rpctlscd_handlerecord_res RPCTLSCD_HANDLERECORD(rpctlscd_handlerecord_arg) = 2; rpctlscd_disconnect_res RPCTLSCD_DISCONNECT(rpctlscd_disconnect_arg) = 3; } = 1; } = 0x40677374;