diff --git a/sys/fs/nfs/nfs_commonkrpc.c.nconn b/sys/fs/nfs/nfs_commonkrpc.c --- a/sys/fs/nfs/nfs_commonkrpc.c.nconn +++ b/sys/fs/nfs/nfs_commonkrpc.c @@ -167,7 +167,8 @@ */ int newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, - struct ucred *cred, NFSPROC_T *p, int callback_retry_mult, bool dotls) + struct ucred *cred, NFSPROC_T *p, int callback_retry_mult, bool dotls, + struct __rpc_client **clipp) { int rcvreserve, sndreserve; int pktscale, pktscalesav; @@ -420,15 +421,22 @@ CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); } + /* + * *clipp is &nrp->nr_client or &nm_aconn[nmp->nm_nextaconn]. + * The latter case is for additional connections specified by the + * "nconnect" mount option. nr_mtx etc is used for these additional + * connections, as well as nr_client in the nfssockreq + * structure for the mount. + */ mtx_lock(&nrp->nr_mtx); - if (nrp->nr_client != NULL) { + if (*clipp != NULL) { mtx_unlock(&nrp->nr_mtx); /* * Someone else already connected. */ CLNT_RELEASE(client); } else { - nrp->nr_client = client; + *clipp = client; /* * Protocols that do not require connections may be optionally * left unconnected for servers that reply from a port other @@ -453,18 +461,34 @@ * NFS disconnect. Clean up and unlink. */ void -newnfs_disconnect(struct nfssockreq *nrp) +newnfs_disconnect(struct nfsmount *nmp, struct nfssockreq *nrp) { - CLIENT *client; + CLIENT *client, *aconn[NFS_MAXNCONN - 1]; + int i; mtx_lock(&nrp->nr_mtx); if (nrp->nr_client != NULL) { client = nrp->nr_client; nrp->nr_client = NULL; + if (nmp != NULL && nmp->nm_aconnect > 0) { + for (i = 0; i < nmp->nm_aconnect; i++) { + aconn[i] = nmp->nm_aconn[i]; + nmp->nm_aconn[i] = NULL; + } + } mtx_unlock(&nrp->nr_mtx); rpc_gss_secpurge_call(client); CLNT_CLOSE(client); CLNT_RELEASE(client); + if (nmp != NULL && nmp->nm_aconnect > 0) { + for (i = 0; i < nmp->nm_aconnect; i++) { + if (aconn[i] != NULL) { + rpc_gss_secpurge_call(aconn[i]); + CLNT_CLOSE(aconn[i]); + CLNT_RELEASE(aconn[i]); + } + } + } } else { mtx_unlock(&nrp->nr_mtx); } @@ -565,7 +589,7 @@ int error = 0, usegssname = 0, secflavour = AUTH_SYS; int freeslot, maxslot, reterr, slotpos, timeo; u_int16_t procnum; - u_int trylater_delay = 1; + u_int nextconn, trylater_delay = 1; struct nfs_feedback_arg nf; struct timeval timo; AUTH *auth; @@ -577,6 +601,7 @@ struct ucred *authcred; struct nfsclsession *sep; uint8_t sessionid[NFSX_V4SESSIONID]; + bool nextconn_set; sep = dssep; if (xidp != NULL) @@ -602,13 +627,25 @@ } /* - * XXX if not already connected call nfs_connect now. Longer - * term, change nfs_mount to call nfs_connect unconditionally - * and let clnt_reconnect_create handle reconnects. + * If not already connected call newnfs_connect now. */ if (nrp->nr_client == NULL) - newnfs_connect(nmp, nrp, cred, td, 0, false); + newnfs_connect(nmp, nrp, cred, td, 0, false, &nrp->nr_client); + nextconn_set = false; + if (nmp != NULL && nmp->nm_aconnect > 0 && + (nd->nd_procnum == NFSPROC_READ || + nd->nd_procnum == NFSPROC_READDIR || + nd->nd_procnum == NFSPROC_READDIRPLUS || + nd->nd_procnum == NFSPROC_WRITE)) { + nextconn = atomic_fetchadd_int(&nmp->nm_nextaconn, 1); + nextconn %= nmp->nm_aconnect; + nextconn_set = true; + if (nmp->nm_aconn[nextconn] == NULL) + newnfs_connect(nmp, nrp, cred, td, 0, false, + &nmp->nm_aconn[nextconn]); + } + /* * For a client side mount, nmp is != NULL and clp == NULL. For * server calls (callbacks or upcalls), nmp == NULL. @@ -830,6 +867,19 @@ if (clp != NULL && sep != NULL) stat = clnt_bck_call(nrp->nr_client, &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt); + else if (nextconn_set) + /* + * When there are multiple TCP connections, send the + * RPCs with large messages on the alternate TCP + * connection(s) in a round robin fashion. + * The small RPC messages are sent on the default + * TCP connection because they do not require much + * network bandwidth and separating them from the + * large RPC messages avoids them getting "log jammed" + * behind several large RPC messages. + */ + stat = CLNT_CALL_MBUF(nmp->nm_aconn[nextconn], + &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo); else stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo); diff --git a/sys/fs/nfs/nfs_commonsubs.c.nconn b/sys/fs/nfs/nfs_commonsubs.c --- a/sys/fs/nfs/nfs_commonsubs.c.nconn +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -3625,7 +3625,8 @@ } rp->nr_vers = RPCNFSUSERD_VERS; if (error == 0) - error = newnfs_connect(NULL, rp, NFSPROCCRED(p), p, 0, false); + error = newnfs_connect(NULL, rp, NFSPROCCRED(p), p, 0, false, + &rp->nr_client); if (error == 0) { NFSLOCKNAMEID(); nfsrv_nfsuserd = RUNNING; @@ -3659,7 +3660,7 @@ msleep(&nfsrv_userdupcalls, NFSNAMEIDMUTEXPTR, PVFS, "nfsupcalls", 0); NFSUNLOCKNAMEID(); - newnfs_disconnect(&nfsrv_nfsuserdsock); + newnfs_disconnect(NULL, &nfsrv_nfsuserdsock); free(nfsrv_nfsuserdsock.nr_nam, M_SONAME); NFSLOCKNAMEID(); nfsrv_nfsuserd = NOTRUNNING; diff --git a/sys/fs/nfs/nfs_var.h.nconn b/sys/fs/nfs/nfs_var.h --- a/sys/fs/nfs/nfs_var.h.nconn +++ b/sys/fs/nfs/nfs_var.h @@ -772,8 +772,8 @@ struct ucred *, u_int32_t, u_int32_t, u_char *, int, u_int64_t *, struct nfsclsession *); int newnfs_connect(struct nfsmount *, struct nfssockreq *, - struct ucred *, NFSPROC_T *, int, bool); -void newnfs_disconnect(struct nfssockreq *); + struct ucred *, NFSPROC_T *, int, bool, struct __rpc_client **); +void newnfs_disconnect(struct nfsmount *, struct nfssockreq *); int newnfs_sigintr(struct nfsmount *, NFSPROC_T *); /* nfs_nfsdkrpc.c */ diff --git a/sys/fs/nfsclient/nfs_clrpcops.c.nconn b/sys/fs/nfsclient/nfs_clrpcops.c --- a/sys/fs/nfsclient/nfs_clrpcops.c.nconn +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -5610,7 +5610,7 @@ * unmount, but I did it anyhow. */ nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred); - error = newnfs_connect(nmp, nrp, NULL, p, 0, false); + error = newnfs_connect(nmp, nrp, NULL, p, 0, false, &nrp->nr_client); NFSCL_DEBUG(3, "DS connect=%d\n", error); dsp = NULL; @@ -5628,7 +5628,7 @@ } while (error == NFSERR_MINORVERMISMATCH && firsttry++ == 0); if (error != 0) - newnfs_disconnect(nrp); + newnfs_disconnect(NULL, nrp); } else { dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO); @@ -5656,7 +5656,7 @@ * If there is already a session for this * server, use it. */ - (void)newnfs_disconnect(nrp); + newnfs_disconnect(NULL, nrp); nfscl_freenfsclds(dsp); *dspp = tdsp; return (0); @@ -5688,7 +5688,7 @@ NFSUNLOCKMNT(nmp); *dspp = dsp; } else if (dsp != NULL) { - newnfs_disconnect(nrp); + newnfs_disconnect(NULL, nrp); nfscl_freenfsclds(dsp); } return (error); diff --git a/sys/fs/nfsclient/nfs_clvfsops.c.nconn b/sys/fs/nfsclient/nfs_clvfsops.c --- a/sys/fs/nfsclient/nfs_clvfsops.c.nconn +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -118,7 +118,7 @@ static int mountnfs(struct nfs_args *, struct mount *, struct sockaddr *, char *, u_char *, int, u_char *, int, u_char *, int, struct vnode **, struct ucred *, - struct thread *, int, int, int, uint32_t, char *); + struct thread *, int, int, int, uint32_t, char *, int); static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *, struct sockaddr_storage *, int *, off_t *, struct timeval *); @@ -548,7 +548,7 @@ nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen, NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO, - NFS_DEFAULT_NEGNAMETIMEO, 0, 0, NULL)) != 0) { + NFS_DEFAULT_NEGNAMETIMEO, 0, 0, NULL, 0)) != 0) { printf("nfs_mountroot: mount %s on /: %d\n", path, error); return (error); } @@ -715,14 +715,14 @@ haslock = 1; } if (!error) { - newnfs_disconnect(&nmp->nm_sockreq); + newnfs_disconnect(nmp, &nmp->nm_sockreq); if (haslock) newnfs_sndunlock(&nmp->nm_sockreq.nr_lock); nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; if (nmp->nm_sotype == SOCK_DGRAM) while (newnfs_connect(nmp, &nmp->nm_sockreq, - cred, td, 0, false)) { + cred, td, 0, false, &nmp->nm_sockreq.nr_client)) { printf("newnfs_args: retrying connect\n"); (void) nfs_catnap(PSOCK, 0, "nfscon"); } @@ -750,7 +750,7 @@ "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh", "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr", - "pnfs", "wcommitsize", "oneopenown", "tls", "tlscertname", + "pnfs", "wcommitsize", "oneopenown", "tls", "tlscertname", "nconnect", NULL }; /* @@ -902,6 +902,7 @@ krbnamelen, srvkrbnamelen; size_t hstlen; uint32_t newflag; + int aconn = 0; has_nfs_args_opt = 0; has_nfs_from_opt = 0; @@ -1192,6 +1193,20 @@ goto out; } } + if (vfs_getopt(mp->mnt_optnew, "nconnect", (void **)&opt, NULL) == + 0) { + ret = sscanf(opt, "%d", &aconn); + if (ret != 1 || aconn < 1 || aconn > NFS_MAXNCONN) { + vfs_mount_error(mp, "illegal nconnect: %s", opt); + error = EINVAL; + goto out; + } + /* + * Setting nconnect=1 is a no-op, allowed so that + * the option can be used in a Linux compatible way. + */ + aconn--; + } if (vfs_getopt(mp->mnt_optnew, "sec", (void **) &secname, NULL) == 0) nfs_sec_name(secname, &args.flags); @@ -1359,10 +1374,25 @@ } } + if (aconn > 0 && (args.sotype != SOCK_STREAM || + (args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) { + /* + * RFC 5661 requires that an NFSv4.1/4.2 server + * send an RPC reply on the same TCP connection + * as the one it received the request on. + * This property in required for "nconnect" and + * might not be the case for NFSv3 or NFSv4.0 servers. + */ + vfs_mount_error(mp, "nconnect should only be used " + "for NFSv4.1/4.2 mounts"); + error = EINVAL; + goto out; + } + args.fh = nfh; error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath, dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td, - nametimeo, negnametimeo, minvers, newflag, tlscertname); + nametimeo, negnametimeo, minvers, newflag, tlscertname, aconn); out: if (!error) { MNT_ILOCK(mp); @@ -1410,7 +1440,7 @@ char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen, u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp, struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo, - int minvers, uint32_t newflag, char *tlscertname) + int minvers, uint32_t newflag, char *tlscertname, int aconn) { struct nfsmount *nmp; struct nfsnode *np; @@ -1577,7 +1607,8 @@ else nmp->nm_sockreq.nr_vers = NFS_VER2; - if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false))) + if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false, + &nmp->nm_sockreq.nr_client))) goto bad; /* For NFSv4, get the clientid now. */ if ((argp->flags & NFSMNT_NFSV4) != 0) { @@ -1586,6 +1617,12 @@ NFSCL_DEBUG(3, "aft getcl=%d\n", error); if (error != 0) goto bad; + if (aconn > 0 && nmp->nm_minorvers == 0) { + vfs_mount_error(mp, "nconnect should only be used " + "for NFSv4.1/4.2 mounts"); + error = EINVAL; + goto bad; + } } if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) && @@ -1680,6 +1717,10 @@ MNT_IUNLOCK(mp); } + /* Can now allow additional connections. */ + if (aconn > 0) + nmp->nm_aconnect = aconn; + /* * Lose the lock but keep the ref. */ @@ -1692,7 +1733,7 @@ bad: if (clp != NULL) nfscl_clientrelease(clp); - newnfs_disconnect(&nmp->nm_sockreq); + newnfs_disconnect(NULL, &nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); if (nmp->nm_sockreq.nr_auth != NULL) AUTH_DESTROY(nmp->nm_sockreq.nr_auth); @@ -1707,7 +1748,7 @@ TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) { if (dsp != TAILQ_FIRST(&nmp->nm_sess) && dsp->nfsclds_sockp != NULL) - newnfs_disconnect(dsp->nfsclds_sockp); + newnfs_disconnect(NULL, dsp->nfsclds_sockp); nfscl_freenfsclds(dsp); } free(nmp->nm_tlscertname, M_NEWNFSMNT); @@ -1793,7 +1834,7 @@ msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0); mtx_unlock(&nmp->nm_mtx); - newnfs_disconnect(&nmp->nm_sockreq); + newnfs_disconnect(nmp, &nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); free(nmp->nm_nam, M_SONAME); if (nmp->nm_sockreq.nr_auth != NULL) @@ -1803,7 +1844,7 @@ TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) { if (dsp != TAILQ_FIRST(&nmp->nm_sess) && dsp->nfsclds_sockp != NULL) - newnfs_disconnect(dsp->nfsclds_sockp); + newnfs_disconnect(NULL, dsp->nfsclds_sockp); nfscl_freenfsclds(dsp); } free(nmp->nm_tlscertname, M_NEWNFSMNT); @@ -2067,6 +2108,7 @@ &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn", &buf, &blen); + nfscl_printoptval(nmp, nmp->nm_aconnect + 1, ",nconnect", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf, diff --git a/sys/fs/nfsclient/nfsmount.h.nconn b/sys/fs/nfsclient/nfsmount.h --- a/sys/fs/nfsclient/nfsmount.h.nconn +++ b/sys/fs/nfsclient/nfsmount.h @@ -39,6 +39,9 @@ #include +/* Maximum value for nm_nconnect. */ +#define NFS_MAXNCONN 16 + /* * Mount structure. * One allocated on every NFS mount. @@ -81,6 +84,11 @@ u_int64_t nm_clval; /* identifies which clientid */ u_int64_t nm_fsid[2]; /* NFSv4 fsid */ int nm_minorvers; /* Minor version # for NFSv4 */ + u_int nm_aconnect; /* additional TCP connections */ + u_int nm_nextaconn; /* Next nm_aconn[] to use */ + /* unclipped, wraps to 0 */ + struct __rpc_client *nm_aconn[NFS_MAXNCONN - 1]; /* Additional nconn */ + /* Locked via nm_sockreq.nr_mtx */ u_int16_t nm_krbnamelen; /* Krb5 host principal, if any */ u_int16_t nm_dirpathlen; /* and mount dirpath, for V4 */ u_int16_t nm_srvkrbnamelen; /* and the server's target name */ diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c.nconn b/sys/fs/nfsserver/nfs_nfsdstate.c --- a/sys/fs/nfsserver/nfs_nfsdstate.c.nconn +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -1369,7 +1369,7 @@ NULL, 0, NULL, NULL, NULL, 0, p); } #endif - newnfs_disconnect(&clp->lc_req); + newnfs_disconnect(NULL, &clp->lc_req); free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); @@ -4578,10 +4578,10 @@ nfsrv_freesession(sep, NULL); } else if (nd->nd_procnum == NFSV4PROC_CBNULL) error = newnfs_connect(NULL, &clp->lc_req, cred, - NULL, 1, dotls); + NULL, 1, dotls, &clp->lc_req.nr_client); else error = newnfs_connect(NULL, &clp->lc_req, cred, - NULL, 3, dotls); + NULL, 3, dotls, &clp->lc_req.nr_client); } newnfs_sndunlock(&clp->lc_req.nr_lock); NFSD_DEBUG(4, "aft sndunlock=%d\n", error);