Changeset View
Standalone View
sys/fs/nfs/nfs_commonkrpc.c
Show First 20 Lines • Show All 161 Lines • ▼ Show 20 Lines | |||||
* Initialize sockets and congestion for a new NFS connection. | * Initialize sockets and congestion for a new NFS connection. | ||||
* We do not free the sockaddr if error. | * We do not free the sockaddr if error. | ||||
* Which arguments are set to NULL indicate what kind of call it is. | * Which arguments are set to NULL indicate what kind of call it is. | ||||
* cred == NULL --> a call to connect to a pNFS DS | * cred == NULL --> a call to connect to a pNFS DS | ||||
* nmp == NULL --> indicates an upcall to userland or a NFSv4.0 callback | * nmp == NULL --> indicates an upcall to userland or a NFSv4.0 callback | ||||
*/ | */ | ||||
int | int | ||||
newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, | newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, | ||||
struct ucred *cred, NFSPROC_T *p, int callback_retry_mult, bool dotls) | struct ucred *cred, NFSPROC_T *p, int callback_retry_mult, bool dotls, | ||||
struct __rpc_client **clipp) | |||||
{ | { | ||||
int rcvreserve, sndreserve; | int rcvreserve, sndreserve; | ||||
int pktscale, pktscalesav; | int pktscale, pktscalesav; | ||||
struct sockaddr *saddr; | struct sockaddr *saddr; | ||||
struct ucred *origcred; | struct ucred *origcred; | ||||
CLIENT *client; | CLIENT *client; | ||||
struct netconfig *nconf; | struct netconfig *nconf; | ||||
struct socket *so; | struct socket *so; | ||||
▲ Show 20 Lines • Show All 236 Lines • ▼ Show 20 Lines | if (nmp != NULL) { | ||||
* doesn't exist for TCP and the following call just fails, | * doesn't exist for TCP and the following call just fails, | ||||
* which is ok. | * which is ok. | ||||
*/ | */ | ||||
timo.tv_sec = nmp->nm_timeo / NFS_HZ; | timo.tv_sec = nmp->nm_timeo / NFS_HZ; | ||||
timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; | timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; | ||||
CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); | CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); | ||||
} | } | ||||
/* | |||||
* *clipp is &nrp->nr_client or &nm_aconn[nmp->nm_nextaconn]. | |||||
* The latter case is for additional connections specified by the | |||||
* "nconnect" mount option. nr_mtx etc is used for these additional | |||||
* connections, as well as nr_client in the nfssockreq | |||||
* structure for the mount. | |||||
*/ | |||||
mtx_lock(&nrp->nr_mtx); | mtx_lock(&nrp->nr_mtx); | ||||
if (nrp->nr_client != NULL) { | if (*clipp != NULL) { | ||||
mtx_unlock(&nrp->nr_mtx); | mtx_unlock(&nrp->nr_mtx); | ||||
/* | /* | ||||
* Someone else already connected. | * Someone else already connected. | ||||
*/ | */ | ||||
CLNT_RELEASE(client); | CLNT_RELEASE(client); | ||||
} else { | } else { | ||||
nrp->nr_client = client; | *clipp = client; | ||||
/* | /* | ||||
* Protocols that do not require connections may be optionally | * Protocols that do not require connections may be optionally | ||||
* left unconnected for servers that reply from a port other | * left unconnected for servers that reply from a port other | ||||
* than NFS_PORT. | * than NFS_PORT. | ||||
*/ | */ | ||||
if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) { | if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) { | ||||
mtx_unlock(&nrp->nr_mtx); | mtx_unlock(&nrp->nr_mtx); | ||||
CLNT_CONTROL(client, CLSET_CONNECT, &one); | CLNT_CONTROL(client, CLSET_CONNECT, &one); | ||||
} else | } else | ||||
mtx_unlock(&nrp->nr_mtx); | mtx_unlock(&nrp->nr_mtx); | ||||
} | } | ||||
out: | out: | ||||
/* Restore current thread's credentials. */ | /* Restore current thread's credentials. */ | ||||
td->td_ucred = origcred; | td->td_ucred = origcred; | ||||
NFSEXITCODE(error); | NFSEXITCODE(error); | ||||
return (error); | return (error); | ||||
} | } | ||||
/* | /* | ||||
* NFS disconnect. Clean up and unlink. | * NFS disconnect. Clean up and unlink. | ||||
*/ | */ | ||||
void | void | ||||
newnfs_disconnect(struct nfssockreq *nrp) | newnfs_disconnect(struct nfsmount *nmp, struct nfssockreq *nrp) | ||||
{ | { | ||||
CLIENT *client; | CLIENT *client, *aconn[NFS_MAXNCONN - 1]; | ||||
int i; | |||||
mtx_lock(&nrp->nr_mtx); | mtx_lock(&nrp->nr_mtx); | ||||
if (nrp->nr_client != NULL) { | if (nrp->nr_client != NULL) { | ||||
client = nrp->nr_client; | client = nrp->nr_client; | ||||
nrp->nr_client = NULL; | nrp->nr_client = NULL; | ||||
if (nmp != NULL && nmp->nm_aconnect > 0) { | |||||
for (i = 0; i < nmp->nm_aconnect; i++) { | |||||
aconn[i] = nmp->nm_aconn[i]; | |||||
nmp->nm_aconn[i] = NULL; | |||||
} | |||||
} | |||||
mtx_unlock(&nrp->nr_mtx); | mtx_unlock(&nrp->nr_mtx); | ||||
rpc_gss_secpurge_call(client); | rpc_gss_secpurge_call(client); | ||||
CLNT_CLOSE(client); | CLNT_CLOSE(client); | ||||
CLNT_RELEASE(client); | CLNT_RELEASE(client); | ||||
if (nmp != NULL && nmp->nm_aconnect > 0) { | |||||
for (i = 0; i < nmp->nm_aconnect; i++) { | |||||
if (aconn[i] != NULL) { | |||||
rpc_gss_secpurge_call(aconn[i]); | |||||
CLNT_CLOSE(aconn[i]); | |||||
CLNT_RELEASE(aconn[i]); | |||||
} | |||||
} | |||||
} | |||||
} else { | } else { | ||||
mtx_unlock(&nrp->nr_mtx); | mtx_unlock(&nrp->nr_mtx); | ||||
} | } | ||||
} | } | ||||
static AUTH * | static AUTH * | ||||
nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, | nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, | ||||
char *srv_principal, gss_OID mech_oid, struct ucred *cred) | char *srv_principal, gss_OID mech_oid, struct ucred *cred) | ||||
▲ Show 20 Lines • Show All 84 Lines • ▼ Show 20 Lines | newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, | ||||
u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep) | u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep) | ||||
{ | { | ||||
uint32_t retseq, retval, slotseq, *tl; | uint32_t retseq, retval, slotseq, *tl; | ||||
time_t waituntil; | time_t waituntil; | ||||
int i = 0, j = 0, opcnt, set_sigset = 0, slot; | int i = 0, j = 0, opcnt, set_sigset = 0, slot; | ||||
int error = 0, usegssname = 0, secflavour = AUTH_SYS; | int error = 0, usegssname = 0, secflavour = AUTH_SYS; | ||||
int freeslot, maxslot, reterr, slotpos, timeo; | int freeslot, maxslot, reterr, slotpos, timeo; | ||||
u_int16_t procnum; | u_int16_t procnum; | ||||
u_int trylater_delay = 1; | u_int nextconn, trylater_delay = 1; | ||||
struct nfs_feedback_arg nf; | struct nfs_feedback_arg nf; | ||||
struct timeval timo; | struct timeval timo; | ||||
AUTH *auth; | AUTH *auth; | ||||
struct rpc_callextra ext; | struct rpc_callextra ext; | ||||
enum clnt_stat stat; | enum clnt_stat stat; | ||||
struct nfsreq *rep = NULL; | struct nfsreq *rep = NULL; | ||||
char *srv_principal = NULL, *clnt_principal = NULL; | char *srv_principal = NULL, *clnt_principal = NULL; | ||||
sigset_t oldset; | sigset_t oldset; | ||||
struct ucred *authcred; | struct ucred *authcred; | ||||
struct nfsclsession *sep; | struct nfsclsession *sep; | ||||
uint8_t sessionid[NFSX_V4SESSIONID]; | uint8_t sessionid[NFSX_V4SESSIONID]; | ||||
bool nextconn_set; | |||||
sep = dssep; | sep = dssep; | ||||
if (xidp != NULL) | if (xidp != NULL) | ||||
*xidp = 0; | *xidp = 0; | ||||
/* Reject requests while attempting a forced unmount. */ | /* Reject requests while attempting a forced unmount. */ | ||||
if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) { | if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) { | ||||
m_freem(nd->nd_mreq); | m_freem(nd->nd_mreq); | ||||
return (ESTALE); | return (ESTALE); | ||||
Show All 9 Lines | newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, | ||||
/* For client side interruptible mounts, mask off the signals. */ | /* For client side interruptible mounts, mask off the signals. */ | ||||
if (nmp != NULL && td != NULL && NFSHASINT(nmp)) { | if (nmp != NULL && td != NULL && NFSHASINT(nmp)) { | ||||
newnfs_set_sigmask(td, &oldset); | newnfs_set_sigmask(td, &oldset); | ||||
set_sigset = 1; | set_sigset = 1; | ||||
} | } | ||||
/* | /* | ||||
* XXX if not already connected call nfs_connect now. Longer | * If not already connected call newnfs_connect now. | ||||
* term, change nfs_mount to call nfs_connect unconditionally | |||||
* and let clnt_reconnect_create handle reconnects. | |||||
*/ | */ | ||||
if (nrp->nr_client == NULL) | if (nrp->nr_client == NULL) | ||||
newnfs_connect(nmp, nrp, cred, td, 0, false); | newnfs_connect(nmp, nrp, cred, td, 0, false, &nrp->nr_client); | ||||
nextconn_set = false; | |||||
if (nmp != NULL && nmp->nm_aconnect > 0 && | |||||
(nd->nd_procnum == NFSPROC_READ || | |||||
nd->nd_procnum == NFSPROC_READDIR || | |||||
nd->nd_procnum == NFSPROC_READDIRPLUS || | |||||
nd->nd_procnum == NFSPROC_WRITE)) { | |||||
nextconn = atomic_fetchadd_int(&nmp->nm_nextaconn, 1); | |||||
nextconn %= nmp->nm_aconnect; | |||||
nextconn_set = true; | |||||
if (nmp->nm_aconn[nextconn] == NULL) | |||||
newnfs_connect(nmp, nrp, cred, td, 0, false, | |||||
&nmp->nm_aconn[nextconn]); | |||||
} | |||||
/* | /* | ||||
* For a client side mount, nmp is != NULL and clp == NULL. For | * For a client side mount, nmp is != NULL and clp == NULL. For | ||||
* server calls (callbacks or upcalls), nmp == NULL. | * server calls (callbacks or upcalls), nmp == NULL. | ||||
*/ | */ | ||||
markj: It looks like we are assuming here that `nm_nextnconn` won't change between here and the change… | |||||
Done Inline ActionsGreat catch. The code now sets a local variable "nextconn" and nm_aconn[] is not actually a TCP connection, but a krpc "client" It never goes back to NULL. During umount() the client is released, rmacklem: Great catch. The code now sets a local variable "nextconn" and
updates nm_nextnconn with the… | |||||
if (clp != NULL) { | if (clp != NULL) { | ||||
NFSLOCKSTATE(); | NFSLOCKSTATE(); | ||||
if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) { | if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) { | ||||
secflavour = RPCSEC_GSS_KRB5; | secflavour = RPCSEC_GSS_KRB5; | ||||
if (nd->nd_procnum != NFSPROC_NULL) { | if (nd->nd_procnum != NFSPROC_NULL) { | ||||
if (clp->lc_flags & LCL_GSSINTEGRITY) | if (clp->lc_flags & LCL_GSSINTEGRITY) | ||||
Done Inline ActionsI suspect this locking could become a bottleneck if many threads are issuing I/O at a high rate. Though, I see that for read and write RPCs we already unconditionally lock the mount to load nm_maxfilesize and nm_wsize, so maybe it is not worth worrying about for now. One observation is that nm_aconnect is effectively fixed at mount time, so we could perhaps avoid the mnt lock and advance nm_nextaconn with atomic_fetchadd_int(). This doesn't solve the scalability problem but might give some marginal benefit if the mount lock is already heavily contended. It may also be reasonable to use a separate iterator for each CPU. markj: I suspect this locking could become a bottleneck if many threads are issuing I/O at a high rate. | |||||
Done Inline ActionsLock contention for NFSLOCKMNT() is unlikely to be an issue, since RPCs will always Also, contention on the NFSCLSTATELOCK() is far more likely to be an issue. rmacklem: Lock contention for NFSLOCKMNT() is unlikely to be an issue, since RPCs will always
take some… | |||||
secflavour = RPCSEC_GSS_KRB5I; | secflavour = RPCSEC_GSS_KRB5I; | ||||
else if (clp->lc_flags & LCL_GSSPRIVACY) | else if (clp->lc_flags & LCL_GSSPRIVACY) | ||||
secflavour = RPCSEC_GSS_KRB5P; | secflavour = RPCSEC_GSS_KRB5P; | ||||
} | } | ||||
} | } | ||||
NFSUNLOCKSTATE(); | NFSUNLOCKSTATE(); | ||||
} else if (nmp != NULL && NFSHASKERB(nmp) && | } else if (nmp != NULL && NFSHASKERB(nmp) && | ||||
nd->nd_procnum != NFSPROC_NULL) { | nd->nd_procnum != NFSPROC_NULL) { | ||||
▲ Show 20 Lines • Show All 195 Lines • ▼ Show 20 Lines | if (rep != NULL) { | ||||
NFSUNLOCKREQ(); | NFSUNLOCKREQ(); | ||||
} | } | ||||
} | } | ||||
nd->nd_mrep = NULL; | nd->nd_mrep = NULL; | ||||
if (clp != NULL && sep != NULL) | if (clp != NULL && sep != NULL) | ||||
stat = clnt_bck_call(nrp->nr_client, &ext, procnum, | stat = clnt_bck_call(nrp->nr_client, &ext, procnum, | ||||
nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt); | nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt); | ||||
else if (nextconn_set) | |||||
/* | |||||
* When there are multiple TCP connections, send the | |||||
* RPCs with large messages on the alternate TCP | |||||
* connection(s) in a round robin fashion. | |||||
* The small RPC messages are sent on the default | |||||
* TCP connection because they do not require much | |||||
* network bandwidth and separating them from the | |||||
* large RPC messages avoids them getting "log jammed" | |||||
* behind several large RPC messages. | |||||
*/ | |||||
stat = CLNT_CALL_MBUF(nmp->nm_aconn[nextconn], | |||||
&ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo); | |||||
else | else | ||||
stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, | stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, | ||||
nd->nd_mreq, &nd->nd_mrep, timo); | nd->nd_mreq, &nd->nd_mrep, timo); | ||||
Done Inline ActionsWhy READDIR but not READDIRPLUS? markj: Why READDIR but not READDIRPLUS? | |||||
Done Inline ActionsGood catch, thanks. rmacklem: Good catch, thanks.
| |||||
NFSCL_DEBUG(2, "clnt call=%d\n", stat); | NFSCL_DEBUG(2, "clnt call=%d\n", stat); | ||||
if (rep != NULL) { | if (rep != NULL) { | ||||
/* | /* | ||||
* RPC done, unlink the request. | * RPC done, unlink the request. | ||||
*/ | */ | ||||
NFSLOCKREQ(); | NFSLOCKREQ(); | ||||
TAILQ_REMOVE(&nfsd_reqq, rep, r_chain); | TAILQ_REMOVE(&nfsd_reqq, rep, r_chain); | ||||
▲ Show 20 Lines • Show All 610 Lines • Show Last 20 Lines |
It looks like we are assuming here that nm_nextnconn won't change between here and the change below, but I can't see any synchronization to guarantee it. In particular, isn't it possible for another thread to perform an RPC and advance nm_nextnconn after we call newnfs_connect() but before we issue the RPC? If that happens immediately after the mount is created or after a disconnect, nmp->nm_aconn[nmp->m_nextnconn] can be NULL.