diff --git a/sys/conf/NOTES.vnet b/sys/conf/NOTES --- a/sys/conf/NOTES.vnet +++ b/sys/conf/NOTES @@ -849,6 +849,7 @@ # Network stack virtualization. options VIMAGE options VNET_DEBUG # debug for VIMAGE +options VNET_NFSD # nfsd in VIMAGE # # Network interfaces: diff --git a/sys/conf/options.vnet b/sys/conf/options --- a/sys/conf/options.vnet +++ b/sys/conf/options @@ -929,6 +929,7 @@ # Network stack virtualization options VIMAGE opt_global.h VNET_DEBUG opt_global.h +VNET_NFSD opt_global.h # Common Flash Interface (CFI) options CFI_SUPPORT_STRATAFLASH opt_cfi.h diff --git a/sys/fs/nfs/nfs_commonport.c.vnetmnt b/sys/fs/nfs/nfs_commonport.c --- a/sys/fs/nfs/nfs_commonport.c.vnetmnt +++ b/sys/fs/nfs/nfs_commonport.c @@ -61,7 +61,6 @@ extern void (*nfsd_call_recall)(struct vnode *, int, struct ucred *, struct thread *); extern int nfsrv_useacl; -struct mount nfsv4root_mnt; int newnfs_numnfsd = 0; struct nfsstatsv1 nfsstatsv1; int nfs_numnfscbd = 0; @@ -76,6 +75,8 @@ vop_reclaim_t *nfs_reclaim_p = NULL; uint32_t nfs_srvmaxio = NFS_SRVMAXIO; +NFSDSTATSDEFINE(nfsstatsv1); + int nfs_pnfsio(task_fn_t *, void *); static int nfs_realign_test; @@ -447,8 +448,13 @@ struct { int vers; /* Just the first field of nfsstats. */ } nfsstatver; + struct nfsstatsv1 *outstats; if (uap->flag & NFSSVC_IDNAME) { + if (jailed(p->td_ucred)) { + error = EPERM; + goto out; + } if ((uap->flag & NFSSVC_NEWSTRUCT) != 0) error = copyin(uap->argp, &nid, sizeof(nid)); else { @@ -470,90 +476,96 @@ error = nfssvc_idname(&nid); goto out; } else if (uap->flag & NFSSVC_GETSTATS) { +#ifdef VNET_NFSD + outstats = malloc(sizeof(*outstats), M_TEMP, M_WAITOK | M_ZERO); +#else + outstats = &nfsstatsv1; +#endif + nfsd_copy_vnetstats(); if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { /* Copy fields to the old ext_nfsstat structure. */ oldnfsstats.attrcache_hits = - nfsstatsv1.attrcache_hits; + outstats->attrcache_hits; oldnfsstats.attrcache_misses = - nfsstatsv1.attrcache_misses; + outstats->attrcache_misses; oldnfsstats.lookupcache_hits = - nfsstatsv1.lookupcache_hits; + outstats->lookupcache_hits; oldnfsstats.lookupcache_misses = - nfsstatsv1.lookupcache_misses; + outstats->lookupcache_misses; oldnfsstats.direofcache_hits = - nfsstatsv1.direofcache_hits; + outstats->direofcache_hits; oldnfsstats.direofcache_misses = - nfsstatsv1.direofcache_misses; + outstats->direofcache_misses; oldnfsstats.accesscache_hits = - nfsstatsv1.accesscache_hits; + outstats->accesscache_hits; oldnfsstats.accesscache_misses = - nfsstatsv1.accesscache_misses; + outstats->accesscache_misses; oldnfsstats.biocache_reads = - nfsstatsv1.biocache_reads; + outstats->biocache_reads; oldnfsstats.read_bios = - nfsstatsv1.read_bios; + outstats->read_bios; oldnfsstats.read_physios = - nfsstatsv1.read_physios; + outstats->read_physios; oldnfsstats.biocache_writes = - nfsstatsv1.biocache_writes; + outstats->biocache_writes; oldnfsstats.write_bios = - nfsstatsv1.write_bios; + outstats->write_bios; oldnfsstats.write_physios = - nfsstatsv1.write_physios; + outstats->write_physios; oldnfsstats.biocache_readlinks = - nfsstatsv1.biocache_readlinks; + outstats->biocache_readlinks; oldnfsstats.readlink_bios = - nfsstatsv1.readlink_bios; + outstats->readlink_bios; oldnfsstats.biocache_readdirs = - nfsstatsv1.biocache_readdirs; + outstats->biocache_readdirs; oldnfsstats.readdir_bios = - nfsstatsv1.readdir_bios; + outstats->readdir_bios; for (i = 0; i < NFSV4_NPROCS; i++) - oldnfsstats.rpccnt[i] = nfsstatsv1.rpccnt[i]; - oldnfsstats.rpcretries = nfsstatsv1.rpcretries; + oldnfsstats.rpccnt[i] = outstats->rpccnt[i]; + oldnfsstats.rpcretries = outstats->rpcretries; for (i = 0; i < NFSV4OP_NOPS; i++) oldnfsstats.srvrpccnt[i] = - nfsstatsv1.srvrpccnt[i]; + outstats->srvrpccnt[i]; for (i = NFSV42_NOPS, j = NFSV4OP_NOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) oldnfsstats.srvrpccnt[j] = - nfsstatsv1.srvrpccnt[i]; + outstats->srvrpccnt[i]; oldnfsstats.reserved_0 = 0; oldnfsstats.reserved_1 = 0; - oldnfsstats.rpcrequests = nfsstatsv1.rpcrequests; - oldnfsstats.rpctimeouts = nfsstatsv1.rpctimeouts; - oldnfsstats.rpcunexpected = nfsstatsv1.rpcunexpected; - oldnfsstats.rpcinvalid = nfsstatsv1.rpcinvalid; + oldnfsstats.rpcrequests = outstats->rpcrequests; + oldnfsstats.rpctimeouts = outstats->rpctimeouts; + oldnfsstats.rpcunexpected = outstats->rpcunexpected; + oldnfsstats.rpcinvalid = outstats->rpcinvalid; oldnfsstats.srvcache_inproghits = - nfsstatsv1.srvcache_inproghits; + outstats->srvcache_inproghits; oldnfsstats.reserved_2 = 0; oldnfsstats.srvcache_nonidemdonehits = - nfsstatsv1.srvcache_nonidemdonehits; + outstats->srvcache_nonidemdonehits; oldnfsstats.srvcache_misses = - nfsstatsv1.srvcache_misses; + outstats->srvcache_misses; oldnfsstats.srvcache_tcppeak = - nfsstatsv1.srvcache_tcppeak; - oldnfsstats.srvcache_size = nfsstatsv1.srvcache_size; - oldnfsstats.srvclients = nfsstatsv1.srvclients; - oldnfsstats.srvopenowners = nfsstatsv1.srvopenowners; - oldnfsstats.srvopens = nfsstatsv1.srvopens; - oldnfsstats.srvlockowners = nfsstatsv1.srvlockowners; - oldnfsstats.srvlocks = nfsstatsv1.srvlocks; - oldnfsstats.srvdelegates = nfsstatsv1.srvdelegates; + outstats->srvcache_tcppeak; + oldnfsstats.srvcache_size = outstats->srvcache_size; + oldnfsstats.srvclients = outstats->srvclients; + oldnfsstats.srvopenowners = outstats->srvopenowners; + oldnfsstats.srvopens = outstats->srvopens; + oldnfsstats.srvlockowners = outstats->srvlockowners; + oldnfsstats.srvlocks = outstats->srvlocks; + oldnfsstats.srvdelegates = outstats->srvdelegates; for (i = 0; i < NFSV4OP_CBNOPS; i++) oldnfsstats.cbrpccnt[i] = - nfsstatsv1.cbrpccnt[i]; - oldnfsstats.clopenowners = nfsstatsv1.clopenowners; - oldnfsstats.clopens = nfsstatsv1.clopens; - oldnfsstats.cllockowners = nfsstatsv1.cllockowners; - oldnfsstats.cllocks = nfsstatsv1.cllocks; - oldnfsstats.cldelegates = nfsstatsv1.cldelegates; + outstats->cbrpccnt[i]; + oldnfsstats.clopenowners = outstats->clopenowners; + oldnfsstats.clopens = outstats->clopens; + oldnfsstats.cllockowners = outstats->cllockowners; + oldnfsstats.cllocks = outstats->cllocks; + oldnfsstats.cldelegates = outstats->cldelegates; oldnfsstats.cllocalopenowners = - nfsstatsv1.cllocalopenowners; - oldnfsstats.cllocalopens = nfsstatsv1.cllocalopens; + outstats->cllocalopenowners; + oldnfsstats.cllocalopens = outstats->cllocalopens; oldnfsstats.cllocallockowners = - nfsstatsv1.cllocallockowners; - oldnfsstats.cllocallocks = nfsstatsv1.cllocallocks; + outstats->cllocallockowners; + oldnfsstats.cllocallocks = outstats->cllocallocks; error = copyout(&oldnfsstats, uap->argp, sizeof (oldnfsstats)); } else { @@ -561,136 +573,136 @@ sizeof(nfsstatver)); if (error == 0) { if (nfsstatver.vers == NFSSTATS_OV1) { - /* Copy nfsstatsv1 to nfsstatsov1. */ + /* Copy outstats to nfsstatsov1. */ nfsstatsov1.attrcache_hits = - nfsstatsv1.attrcache_hits; + outstats->attrcache_hits; nfsstatsov1.attrcache_misses = - nfsstatsv1.attrcache_misses; + outstats->attrcache_misses; nfsstatsov1.lookupcache_hits = - nfsstatsv1.lookupcache_hits; + outstats->lookupcache_hits; nfsstatsov1.lookupcache_misses = - nfsstatsv1.lookupcache_misses; + outstats->lookupcache_misses; nfsstatsov1.direofcache_hits = - nfsstatsv1.direofcache_hits; + outstats->direofcache_hits; nfsstatsov1.direofcache_misses = - nfsstatsv1.direofcache_misses; + outstats->direofcache_misses; nfsstatsov1.accesscache_hits = - nfsstatsv1.accesscache_hits; + outstats->accesscache_hits; nfsstatsov1.accesscache_misses = - nfsstatsv1.accesscache_misses; + outstats->accesscache_misses; nfsstatsov1.biocache_reads = - nfsstatsv1.biocache_reads; + outstats->biocache_reads; nfsstatsov1.read_bios = - nfsstatsv1.read_bios; + outstats->read_bios; nfsstatsov1.read_physios = - nfsstatsv1.read_physios; + outstats->read_physios; nfsstatsov1.biocache_writes = - nfsstatsv1.biocache_writes; + outstats->biocache_writes; nfsstatsov1.write_bios = - nfsstatsv1.write_bios; + outstats->write_bios; nfsstatsov1.write_physios = - nfsstatsv1.write_physios; + outstats->write_physios; nfsstatsov1.biocache_readlinks = - nfsstatsv1.biocache_readlinks; + outstats->biocache_readlinks; nfsstatsov1.readlink_bios = - nfsstatsv1.readlink_bios; + outstats->readlink_bios; nfsstatsov1.biocache_readdirs = - nfsstatsv1.biocache_readdirs; + outstats->biocache_readdirs; nfsstatsov1.readdir_bios = - nfsstatsv1.readdir_bios; + outstats->readdir_bios; for (i = 0; i < NFSV42_OLDNPROCS; i++) nfsstatsov1.rpccnt[i] = - nfsstatsv1.rpccnt[i]; + outstats->rpccnt[i]; nfsstatsov1.rpcretries = - nfsstatsv1.rpcretries; + outstats->rpcretries; for (i = 0; i < NFSV42_PURENOPS; i++) nfsstatsov1.srvrpccnt[i] = - nfsstatsv1.srvrpccnt[i]; + outstats->srvrpccnt[i]; for (i = NFSV42_NOPS, j = NFSV42_PURENOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) nfsstatsov1.srvrpccnt[j] = - nfsstatsv1.srvrpccnt[i]; + outstats->srvrpccnt[i]; nfsstatsov1.reserved_0 = 0; nfsstatsov1.reserved_1 = 0; nfsstatsov1.rpcrequests = - nfsstatsv1.rpcrequests; + outstats->rpcrequests; nfsstatsov1.rpctimeouts = - nfsstatsv1.rpctimeouts; + outstats->rpctimeouts; nfsstatsov1.rpcunexpected = - nfsstatsv1.rpcunexpected; + outstats->rpcunexpected; nfsstatsov1.rpcinvalid = - nfsstatsv1.rpcinvalid; + outstats->rpcinvalid; nfsstatsov1.srvcache_inproghits = - nfsstatsv1.srvcache_inproghits; + outstats->srvcache_inproghits; nfsstatsov1.reserved_2 = 0; nfsstatsov1.srvcache_nonidemdonehits = - nfsstatsv1.srvcache_nonidemdonehits; + outstats->srvcache_nonidemdonehits; nfsstatsov1.srvcache_misses = - nfsstatsv1.srvcache_misses; + outstats->srvcache_misses; nfsstatsov1.srvcache_tcppeak = - nfsstatsv1.srvcache_tcppeak; + outstats->srvcache_tcppeak; nfsstatsov1.srvcache_size = - nfsstatsv1.srvcache_size; + outstats->srvcache_size; nfsstatsov1.srvclients = - nfsstatsv1.srvclients; + outstats->srvclients; nfsstatsov1.srvopenowners = - nfsstatsv1.srvopenowners; + outstats->srvopenowners; nfsstatsov1.srvopens = - nfsstatsv1.srvopens; + outstats->srvopens; nfsstatsov1.srvlockowners = - nfsstatsv1.srvlockowners; + outstats->srvlockowners; nfsstatsov1.srvlocks = - nfsstatsv1.srvlocks; + outstats->srvlocks; nfsstatsov1.srvdelegates = - nfsstatsv1.srvdelegates; + outstats->srvdelegates; for (i = 0; i < NFSV42_CBNOPS; i++) nfsstatsov1.cbrpccnt[i] = - nfsstatsv1.cbrpccnt[i]; + outstats->cbrpccnt[i]; nfsstatsov1.clopenowners = - nfsstatsv1.clopenowners; + outstats->clopenowners; nfsstatsov1.clopens = - nfsstatsv1.clopens; + outstats->clopens; nfsstatsov1.cllockowners = - nfsstatsv1.cllockowners; + outstats->cllockowners; nfsstatsov1.cllocks = - nfsstatsv1.cllocks; + outstats->cllocks; nfsstatsov1.cldelegates = - nfsstatsv1.cldelegates; + outstats->cldelegates; nfsstatsov1.cllocalopenowners = - nfsstatsv1.cllocalopenowners; + outstats->cllocalopenowners; nfsstatsov1.cllocalopens = - nfsstatsv1.cllocalopens; + outstats->cllocalopens; nfsstatsov1.cllocallockowners = - nfsstatsv1.cllocallockowners; + outstats->cllocallockowners; nfsstatsov1.cllocallocks = - nfsstatsv1.cllocallocks; + outstats->cllocallocks; nfsstatsov1.srvstartcnt = - nfsstatsv1.srvstartcnt; + outstats->srvstartcnt; nfsstatsov1.srvdonecnt = - nfsstatsv1.srvdonecnt; + outstats->srvdonecnt; for (i = NFSV42_NOPS, j = NFSV42_PURENOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) { nfsstatsov1.srvbytes[j] = - nfsstatsv1.srvbytes[i]; + outstats->srvbytes[i]; nfsstatsov1.srvops[j] = - nfsstatsv1.srvops[i]; + outstats->srvops[i]; nfsstatsov1.srvduration[j] = - nfsstatsv1.srvduration[i]; + outstats->srvduration[i]; } nfsstatsov1.busyfrom = - nfsstatsv1.busyfrom; + outstats->busyfrom; nfsstatsov1.busyfrom = - nfsstatsv1.busyfrom; + outstats->busyfrom; error = copyout(&nfsstatsov1, uap->argp, sizeof(nfsstatsov1)); } else if (nfsstatver.vers != NFSSTATS_V1) error = EPERM; else - error = copyout(&nfsstatsv1, uap->argp, + error = copyout(outstats, uap->argp, sizeof(nfsstatsv1)); } } @@ -721,23 +733,31 @@ nfsstatsv1.rpcinvalid = 0; bzero(nfsstatsv1.rpccnt, sizeof(nfsstatsv1.rpccnt)); + bzero(nfsstatsv1.cbrpccnt, + sizeof(nfsstatsv1.cbrpccnt)); } if ((uap->flag & NFSSVC_ZEROSRVSTATS) != 0) { + nfsd_zero_vnetstats(); nfsstatsv1.srvcache_inproghits = 0; nfsstatsv1.srvcache_nonidemdonehits = 0; nfsstatsv1.srvcache_misses = 0; nfsstatsv1.srvcache_tcppeak = 0; bzero(nfsstatsv1.srvrpccnt, sizeof(nfsstatsv1.srvrpccnt)); - bzero(nfsstatsv1.cbrpccnt, - sizeof(nfsstatsv1.cbrpccnt)); } } +#ifdef VNET_NFSD + free(outstats, M_TEMP); +#endif goto out; } else if (uap->flag & NFSSVC_NFSUSERDPORT) { u_short sockport; struct nfsuserd_args nargs; + if (jailed(p->td_ucred)) { + error = EPERM; + goto out; + } if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { error = copyin(uap->argp, (caddr_t)&sockport, sizeof (u_short)); @@ -755,6 +775,10 @@ if (!error) error = nfsrv_nfsuserdport(&nargs, p); } else if (uap->flag & NFSSVC_NFSUSERDDELPORT) { + if (jailed(p->td_ucred)) { + error = EPERM; + goto out; + } nfsrv_nfsuserddelport(); error = 0; } diff --git a/sys/fs/nfs/nfs_commonsubs.c.vnet b/sys/fs/nfs/nfs_commonsubs.c --- a/sys/fs/nfs/nfs_commonsubs.c.vnet +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -3931,7 +3931,7 @@ cr->cr_uid = cr->cr_ruid = cr->cr_svuid = nidp->nid_uid; crsetgroups(cr, nidp->nid_ngroup, grps); cr->cr_rgid = cr->cr_svgid = cr->cr_groups[0]; - cr->cr_prison = &prison0; + cr->cr_prison = curthread->td_ucred->cr_prison; prison_hold(cr->cr_prison); #ifdef MAC mac_cred_associate_nfsd(cr); diff --git a/sys/fs/nfs/nfsdport.h.vnetdcl b/sys/fs/nfs/nfsdport.h --- a/sys/fs/nfs/nfsdport.h.vnetdcl +++ b/sys/fs/nfs/nfsdport.h @@ -92,7 +92,7 @@ bcmp(&(f1)->fh_fid, &(f2)->fh_fid, sizeof(struct fid)) == 0) #define NFSLOCKHASH(f) \ - (&nfslockhash[nfsrv_hashfh(f) % nfsrv_lockhashsize]) + (&VNET(nfslockhash)[nfsrv_hashfh(f) % nfsrv_lockhashsize]) #define NFSFPVNODE(f) ((f)->f_vnode) #define NFSFPCRED(f) ((f)->f_cred) diff --git a/sys/fs/nfs/nfsdvnet.h.vnet b/sys/fs/nfs/nfsdvnet.h --- a/sys/fs/nfs/nfsdvnet.h.vnet +++ b/sys/fs/nfs/nfsdvnet.h @@ -0,0 +1,59 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2022 Rick Macklem + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NFS_NFSDVNET_H_ +#define _NFS_NFSDVNET_H_ + +#ifdef _KERNEL +#if defined(VIMAGE) && !defined(VNET_NFSD) +/* Don't allocate variables to the vnet. */ +#undef VNET_NAME +#undef VNET_DECLARE +#undef VNET_DEFINE +#undef VNET_DEFINE_STATIC +#undef VNET +#define VNET_NAME(n) n +#define VNET_DECLARE(t, n) extern t n +#define VNET_DEFINE(t, n) struct _hack; t n +#define VNET_DEFINE_STATIC(t, n) static t n +#define VNET(n) (n) + +#undef CTLFLAG_VNET +#define CTLFLAG_VNET 0 + +#undef CURVNET_SET +#undef CURVNET_RESTORE +#undef TD_TO_VNET +#define CURVNET_SET(n) +#define CURVNET_RESTORE() +#define TD_TO_VNET(n) NULL +#endif /* VIMAGE && !VNET_NFSD */ +#endif /* _KERNEL */ + +#endif /* _NFS_NFSDVNET_H */ diff --git a/sys/fs/nfs/nfsport.h.vnetstats b/sys/fs/nfs/nfsport.h --- a/sys/fs/nfs/nfsport.h.vnetstats +++ b/sys/fs/nfs/nfsport.h @@ -181,6 +181,54 @@ */ #define NFSMUTEX_T struct mtx +/* Macros for VNET_NFSD. */ +#ifdef VNET_NFSD +CTASSERT(VIMAGE != 0); +#define NFSDSTATS(s) VNET(nfsstatsv1_vnet) +#define NFSDSTATSDECLARE(s) VNET_DECLARE(struct nfsstatsv1_vnet, nfsstatsv1_vnet) +#define NFSDSTATSDEFINE(s) VNET_DEFINE(struct nfsstatsv1_vnet, nfsstatsv1_vnet) +#define nfsd_copy_vnetstats() do { \ + int iii; \ + NFSBCOPY(&nfsstatsv1, outstats, sizeof(nfsstatsv1)); \ + for (iii = 0; iii < NFSV42_NOPS + NFSV4OP_FAKENOPS; iii++) { \ + outstats->srvrpccnt[iii] = VNET(nfsstatsv1_vnet).srvrpccnt[iii]; \ + outstats->srvbytes[iii] = VNET(nfsstatsv1_vnet).srvbytes[iii]; \ + outstats->srvops[iii] = VNET(nfsstatsv1_vnet).srvops[iii]; \ + outstats->srvduration[iii] = VNET(nfsstatsv1_vnet).srvduration[iii]; \ + } \ + outstats->srvlayouts = VNET(nfsstatsv1_vnet).srvlayouts; \ + outstats->srvcache_inproghits = VNET(nfsstatsv1_vnet).srvcache_inproghits; \ + outstats->srvcache_nonidemdonehits = VNET(nfsstatsv1_vnet).srvcache_nonidemdonehits; \ + outstats->srvcache_misses = VNET(nfsstatsv1_vnet).srvcache_misses; \ + outstats->srvcache_tcppeak = VNET(nfsstatsv1_vnet).srvcache_tcppeak; \ + outstats->srvcache_size = VNET(nfsstatsv1_vnet).srvcache_size; \ + outstats->srvclients = VNET(nfsstatsv1_vnet).srvclients; \ + outstats->srvopenowners = VNET(nfsstatsv1_vnet).srvopenowners; \ + outstats->srvopens = VNET(nfsstatsv1_vnet).srvopens; \ + outstats->srvlockowners = VNET(nfsstatsv1_vnet).srvlockowners; \ + outstats->srvlocks = VNET(nfsstatsv1_vnet).srvlocks; \ + outstats->srvdelegates = VNET(nfsstatsv1_vnet).srvdelegates; \ + outstats->srvstartcnt = VNET(nfsstatsv1_vnet).srvstartcnt; \ + outstats->srvdonecnt = VNET(nfsstatsv1_vnet).srvdonecnt; \ + outstats->busyfrom = VNET(nfsstatsv1_vnet).busyfrom; \ + outstats->busytime = VNET(nfsstatsv1_vnet).busytime; \ + } while (0) +#define nfsd_zero_vnetstats() do { \ + VNET(nfsstatsv1_vnet).srvcache_inproghits = 0; \ + VNET(nfsstatsv1_vnet).srvcache_nonidemdonehits = 0; \ + VNET(nfsstatsv1_vnet).srvcache_misses = 0; \ + VNET(nfsstatsv1_vnet).srvcache_tcppeak = 0; \ + bzero(VNET(nfsstatsv1_vnet).srvrpccnt, \ + sizeof(nfsstatsv1.srvrpccnt)); \ + } while (0) +#else /* !VNET_NFSD */ +#define NFSDSTATS(s) s +#define NFSDSTATSDECLARE(s) extern struct s s +#define NFSDSTATSDEFINE(s) +#define nfsd_copy_vnetstats() +#define nfsd_zero_vnetstats() +#endif /* VNET_NFSD */ + #endif /* _KERNEL */ /* @@ -493,6 +541,33 @@ uint64_t cllocalopens; uint64_t cllocallockowners; uint64_t cllocallocks; + uint64_t srvstartcnt; + uint64_t srvdonecnt; + uint64_t srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; + uint64_t srvops[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; + struct bintime srvduration[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; + struct bintime busyfrom; + struct bintime busytime; +}; + +/* + * Subset of above structure that needs to be vnet'd for the case + * of VNET_NFSD. + */ +struct nfsstatsv1_vnet { + uint64_t srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; + uint64_t srvlayouts; + uint64_t srvcache_inproghits; + uint64_t srvcache_nonidemdonehits; + uint64_t srvcache_misses; + uint64_t srvcache_tcppeak; + int srvcache_size; /* Updated by atomic_xx_int(). */ + uint64_t srvclients; + uint64_t srvopenowners; + uint64_t srvopens; + uint64_t srvlockowners; + uint64_t srvlocks; + uint64_t srvdelegates; uint64_t srvstartcnt; uint64_t srvdonecnt; uint64_t srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15]; diff --git a/sys/fs/nfs/nfsrvstate.h.vnetdcl b/sys/fs/nfs/nfsrvstate.h --- a/sys/fs/nfs/nfsrvstate.h.vnetdcl +++ b/sys/fs/nfs/nfsrvstate.h @@ -58,7 +58,7 @@ TAILQ_HEAD(nfsuserhashhead, nfsusrgrp); #define NFSCLIENTHASH(id) \ - (&nfsclienthash[(id).lval[1] % nfsrv_clienthashsize]) + (&VNET(nfsclienthash)[(id).lval[1] % nfsrv_clienthashsize]) #define NFSSTATEHASH(clp, id) \ (&((clp)->lc_stateid[(id).other[2] % nfsrv_statehashsize])) #define NFSUSERHASH(id) \ @@ -77,7 +77,7 @@ struct nfssessionhashhead list; }; #define NFSSESSIONHASH(f) \ - (&nfssessionhash[nfsrv_hashsessionid(f) % nfsrv_sessionhashsize]) + (&VNET(nfssessionhash)[nfsrv_hashsessionid(f) % nfsrv_sessionhashsize]) struct nfslayouthash { struct mtx mtx; diff --git a/sys/fs/nfsserver/nfs_fha_new.c.vnetdcl b/sys/fs/nfsserver/nfs_fha_new.c --- a/sys/fs/nfsserver/nfs_fha_new.c.vnetdcl +++ b/sys/fs/nfsserver/nfs_fha_new.c @@ -34,6 +34,7 @@ #include #include +#include #include #include @@ -61,8 +62,9 @@ SYSCTL_DECL(_vfs_nfsd); extern int newnfs_nfsv3_procid[]; -extern SVCPOOL *nfsrvd_pool; +VNET_DECLARE(SVCPOOL *, nfsrvd_pool); + SYSINIT(nfs_fhanew, SI_SUB_ROOT_CONF, SI_ORDER_ANY, fhanew_init, NULL); SYSUNINIT(nfs_fhanew, SI_SUB_ROOT_CONF, SI_ORDER_ANY, fhanew_uninit, NULL); @@ -79,7 +81,7 @@ snprintf(softc->server_name, sizeof(softc->server_name), FHANEW_SERVER_NAME); - softc->pool = &nfsrvd_pool; + softc->pool = &VNET(nfsrvd_pool); /* * Initialize the sysctl context list for the fha module. diff --git a/sys/fs/nfsserver/nfs_nfsdcache.c.vnetdcl b/sys/fs/nfsserver/nfs_nfsdcache.c --- a/sys/fs/nfsserver/nfs_nfsdcache.c.vnetdcl +++ b/sys/fs/nfsserver/nfs_nfsdcache.c @@ -159,13 +159,18 @@ * never happens. */ #include +#include -extern struct nfsstatsv1 nfsstatsv1; extern struct mtx nfsrc_udpmtx; -extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; -extern struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; -int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0; +NFSDSTATSDECLARE(nfsstatsv1); + +VNET_DECLARE(struct nfsrchash_bucket, nfsrchash_table[NFSRVCACHE_HASHSIZE]); +VNET_DECLARE(struct nfsrchash_bucket, nfsrcahash_table[NFSRVCACHE_HASHSIZE]); + +VNET_DEFINE(int, nfsrc_floodlevel) = NFSRVCACHE_FLOODLEVEL; +VNET_DEFINE(int, nfsrc_tcpsavedreplies) = 0; + SYSCTL_DECL(_vfs_nfsd); static u_int nfsrc_tcphighwater = 0; @@ -180,8 +185,8 @@ return (error); if (newhighwater < 0) return (EINVAL); - if (newhighwater >= nfsrc_floodlevel) - nfsrc_floodlevel = newhighwater + newhighwater / 5; + if (newhighwater >= VNET(nfsrc_floodlevel)) + VNET(nfsrc_floodlevel) = newhighwater + newhighwater / 5; nfsrc_tcphighwater = newhighwater; return (0); } @@ -202,9 +207,9 @@ &nfsrc_tcpnonidempotent, 0, "Enable the DRC for NFS over TCP"); -static int nfsrc_udpcachesize = 0; -static TAILQ_HEAD(, nfsrvcache) nfsrvudplru; -static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE]; +VNET_DEFINE_STATIC(int, nfsrc_udpcachesize) = 0; +VNET_DEFINE_STATIC(TAILQ_HEAD(, nfsrvcache), nfsrvudplru); +VNET_DEFINE_STATIC(struct nfsrvhashhead, nfsrvudphashtbl[NFSRVCACHE_HASHSIZE]); /* * and the reverse mapping from generic to Version 2 procedure numbers @@ -236,10 +241,10 @@ #define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE) #define NFSRCUDPHASH(xid) \ - (&nfsrvudphashtbl[nfsrc_hash(xid)]) + (&VNET(nfsrvudphashtbl)[nfsrc_hash(xid)]) #define NFSRCHASH(xid) \ - (&nfsrchash_table[nfsrc_hash(xid)].tbl) -#define NFSRCAHASH(xid) (&nfsrcahash_table[nfsrc_hash(xid)]) + (&VNET(nfsrchash_table)[nfsrc_hash(xid)].tbl) +#define NFSRCAHASH(xid) (&VNET(nfsrcahash_table)[nfsrc_hash(xid)]) #define TRUE 1 #define FALSE 0 #define NFSRVCACHE_CHECKLEN 100 @@ -295,7 +300,7 @@ if ((rp->rc_flag & RC_UDP) != 0) return (&nfsrc_udpmtx); - return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx); + return (&VNET(nfsrchash_table)[nfsrc_hash(rp->rc_xid)].mtx); } /* @@ -305,21 +310,21 @@ nfsrvd_initcache(void) { int i; - static int inited = 0; - if (inited) - return; - inited = 1; for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - LIST_INIT(&nfsrvudphashtbl[i]); - LIST_INIT(&nfsrchash_table[i].tbl); - LIST_INIT(&nfsrcahash_table[i].tbl); + mtx_init(&VNET(nfsrchash_table)[i].mtx, "nfsrtc", NULL, + MTX_DEF); + mtx_init(&VNET(nfsrcahash_table)[i].mtx, "nfsrtca", NULL, + MTX_DEF); } - TAILQ_INIT(&nfsrvudplru); - nfsrc_tcpsavedreplies = 0; - nfsrc_udpcachesize = 0; - nfsstatsv1.srvcache_tcppeak = 0; - nfsstatsv1.srvcache_size = 0; + for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { + LIST_INIT(&VNET(nfsrvudphashtbl)[i]); + LIST_INIT(&VNET(nfsrchash_table)[i].tbl); + LIST_INIT(&VNET(nfsrcahash_table)[i].tbl); + } + TAILQ_INIT(&VNET(nfsrvudplru)); + VNET(nfsrc_tcpsavedreplies) = 0; + VNET(nfsrc_udpcachesize) = 0; } /* @@ -392,17 +397,17 @@ if (rp->rc_flag == 0) panic("nfs udp cache0"); rp->rc_flag |= RC_LOCKED; - TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); - TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); + TAILQ_REMOVE(&VNET(nfsrvudplru), rp, rc_lru); + TAILQ_INSERT_TAIL(&VNET(nfsrvudplru), rp, rc_lru); if (rp->rc_flag & RC_INPROG) { - nfsstatsv1.srvcache_inproghits++; + NFSDSTATS(nfsstatsv1).srvcache_inproghits++; mtx_unlock(mutex); ret = RC_DROPIT; } else if (rp->rc_flag & RC_REPSTATUS) { /* * V2 only. */ - nfsstatsv1.srvcache_nonidemdonehits++; + NFSDSTATS(nfsstatsv1).srvcache_nonidemdonehits++; mtx_unlock(mutex); nfsrvd_rephead(nd); *(nd->nd_errp) = rp->rc_status; @@ -410,7 +415,7 @@ rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_UDPTIMEOUT; } else if (rp->rc_flag & RC_REPMBUF) { - nfsstatsv1.srvcache_nonidemdonehits++; + NFSDSTATS(nfsstatsv1).srvcache_nonidemdonehits++; mtx_unlock(mutex); nd->nd_mreq = m_copym(rp->rc_reply, 0, M_COPYALL, M_WAITOK); @@ -425,9 +430,9 @@ goto out; } } - nfsstatsv1.srvcache_misses++; - atomic_add_int(&nfsstatsv1.srvcache_size, 1); - nfsrc_udpcachesize++; + NFSDSTATS(nfsstatsv1).srvcache_misses++; + atomic_add_int(&NFSDSTATS(nfsstatsv1).srvcache_size, 1); + VNET(nfsrc_udpcachesize)++; newrp->rc_flag |= RC_INPROG; saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); @@ -440,7 +445,7 @@ newrp->rc_flag |= RC_INETIPV6; } LIST_INSERT_HEAD(hp, newrp, rc_hash); - TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru); + TAILQ_INSERT_TAIL(&VNET(nfsrvudplru), newrp, rc_lru); mtx_unlock(mutex); nd->nd_rp = newrp; ret = RC_DOIT; @@ -472,15 +477,15 @@ panic("nfsrvd_updatecache not inprog"); rp->rc_flag &= ~RC_INPROG; if (rp->rc_flag & RC_UDP) { - TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); - TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); + TAILQ_REMOVE(&VNET(nfsrvudplru), rp, rc_lru); + TAILQ_INSERT_TAIL(&VNET(nfsrvudplru), rp, rc_lru); } /* * Reply from cache is a special case returned by nfsrv_checkseqid(). */ if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) { - nfsstatsv1.srvcache_nonidemdonehits++; + NFSDSTATS(nfsstatsv1).srvcache_nonidemdonehits++; mtx_unlock(mutex); nd->nd_repstat = 0; if (nd->nd_mreq) @@ -503,7 +508,7 @@ (rp->rc_refcnt > 0 || ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) || ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) && - nfsrc_tcpsavedreplies <= nfsrc_floodlevel && + VNET(nfsrc_tcpsavedreplies) <= VNET(nfsrc_floodlevel) && nfsrc_tcpnonidempotent))) { if (rp->rc_refcnt > 0) { if (!(rp->rc_flag & RC_NFSV4)) @@ -517,11 +522,11 @@ mtx_unlock(mutex); } else { if (!(rp->rc_flag & RC_UDP)) { - atomic_add_int(&nfsrc_tcpsavedreplies, 1); - if (nfsrc_tcpsavedreplies > - nfsstatsv1.srvcache_tcppeak) - nfsstatsv1.srvcache_tcppeak = - nfsrc_tcpsavedreplies; + atomic_add_int(&VNET(nfsrc_tcpsavedreplies), 1); + if (VNET(nfsrc_tcpsavedreplies) > + NFSDSTATS(nfsstatsv1).srvcache_tcppeak) + NFSDSTATS(nfsstatsv1).srvcache_tcppeak = + VNET(nfsrc_tcpsavedreplies); } mtx_unlock(mutex); m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); @@ -678,7 +683,7 @@ panic("nfs tcp cache0"); rp->rc_flag |= RC_LOCKED; if (rp->rc_flag & RC_INPROG) { - nfsstatsv1.srvcache_inproghits++; + NFSDSTATS(nfsstatsv1).srvcache_inproghits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); @@ -687,7 +692,7 @@ /* * V2 only. */ - nfsstatsv1.srvcache_nonidemdonehits++; + NFSDSTATS(nfsstatsv1).srvcache_nonidemdonehits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); @@ -696,7 +701,7 @@ *(nd->nd_errp) = rp->rc_status; rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; } else if (rp->rc_flag & RC_REPMBUF) { - nfsstatsv1.srvcache_nonidemdonehits++; + NFSDSTATS(nfsstatsv1).srvcache_nonidemdonehits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); @@ -711,8 +716,8 @@ free(newrp, M_NFSRVCACHE); goto out; } - nfsstatsv1.srvcache_misses++; - atomic_add_int(&nfsstatsv1.srvcache_size, 1); + NFSDSTATS(nfsstatsv1).srvcache_misses++; + atomic_add_int(&NFSDSTATS(nfsstatsv1).srvcache_size, 1); /* * For TCP, multiple entries for a key are allowed, so don't @@ -785,8 +790,8 @@ LIST_REMOVE(rp, rc_hash); if (rp->rc_flag & RC_UDP) { - TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); - nfsrc_udpcachesize--; + TAILQ_REMOVE(&VNET(nfsrvudplru), rp, rc_lru); + VNET(nfsrc_udpcachesize)--; } else if (rp->rc_acked != RC_NO_SEQ) { hbp = NFSRCAHASH(rp->rc_sockref); mtx_lock(&hbp->mtx); @@ -798,10 +803,10 @@ if (rp->rc_flag & RC_REPMBUF) { m_freem(rp->rc_reply); if (!(rp->rc_flag & RC_UDP)) - atomic_add_int(&nfsrc_tcpsavedreplies, -1); + atomic_add_int(&VNET(nfsrc_tcpsavedreplies), -1); } free(rp, M_NFSRVCACHE); - atomic_add_int(&nfsstatsv1.srvcache_size, -1); + atomic_add_int(&NFSDSTATS(nfsstatsv1).srvcache_size, -1); } /* @@ -814,20 +819,20 @@ int i; for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - mtx_lock(&nfsrchash_table[i].mtx); - LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp) + mtx_lock(&VNET(nfsrchash_table)[i].mtx); + LIST_FOREACH_SAFE(rp, &VNET(nfsrchash_table)[i].tbl, rc_hash, nextrp) nfsrc_freecache(rp); - mtx_unlock(&nfsrchash_table[i].mtx); + mtx_unlock(&VNET(nfsrchash_table)[i].mtx); } mtx_lock(&nfsrc_udpmtx); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) { + LIST_FOREACH_SAFE(rp, &VNET(nfsrvudphashtbl)[i], rc_hash, nextrp) { nfsrc_freecache(rp); } } - nfsstatsv1.srvcache_size = 0; + NFSDSTATS(nfsstatsv1).srvcache_size = 0; mtx_unlock(&nfsrc_udpmtx); - nfsrc_tcpsavedreplies = 0; + VNET(nfsrc_tcpsavedreplies) = 0; } #define HISTSIZE 16 @@ -864,25 +869,25 @@ if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0) return; if (NFSD_MONOSEC != udp_lasttrim || - nfsrc_udpcachesize >= (nfsrc_udphighwater + + VNET(nfsrc_udpcachesize) >= (nfsrc_udphighwater + nfsrc_udphighwater / 2)) { mtx_lock(&nfsrc_udpmtx); udp_lasttrim = NFSD_MONOSEC; - TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) { + TAILQ_FOREACH_SAFE(rp, &VNET(nfsrvudplru), rc_lru, nextrp) { if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) && rp->rc_refcnt == 0 && ((rp->rc_flag & RC_REFCNT) || udp_lasttrim > rp->rc_timestamp || - nfsrc_udpcachesize > nfsrc_udphighwater)) + VNET(nfsrc_udpcachesize) > nfsrc_udphighwater)) nfsrc_freecache(rp); } mtx_unlock(&nfsrc_udpmtx); } if (NFSD_MONOSEC != tcp_lasttrim || - nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) { + VNET(nfsrc_tcpsavedreplies) >= nfsrc_tcphighwater) { force = nfsrc_tcphighwater / 4; if (force > 0 && - nfsrc_tcpsavedreplies + force >= nfsrc_tcphighwater) { + VNET(nfsrc_tcpsavedreplies) + force >= nfsrc_tcphighwater) { for (i = 0; i < HISTSIZE; i++) time_histo[i] = 0; i = 0; @@ -901,8 +906,8 @@ tto = nfsrc_tcptimeout; tcp_lasttrim = NFSD_MONOSEC; for (; i <= lastslot; i++) { - mtx_lock(&nfsrchash_table[i].mtx); - LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, + mtx_lock(&VNET(nfsrchash_table)[i].mtx); + LIST_FOREACH_SAFE(rp, &VNET(nfsrchash_table)[i].tbl, rc_hash, nextrp) { if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) @@ -932,7 +937,7 @@ time_histo[j]++; } } - mtx_unlock(&nfsrchash_table[i].mtx); + mtx_unlock(&VNET(nfsrchash_table)[i].mtx); } if (force) { /* @@ -951,8 +956,8 @@ k = 1; thisstamp = tcp_lasttrim + k; for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - mtx_lock(&nfsrchash_table[i].mtx); - LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, + mtx_lock(&VNET(nfsrchash_table)[i].mtx); + LIST_FOREACH_SAFE(rp, &VNET(nfsrchash_table)[i].tbl, rc_hash, nextrp) { if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) @@ -962,7 +967,7 @@ rp->rc_acked == RC_ACK)) nfsrc_freecache(rp); } - mtx_unlock(&nfsrchash_table[i].mtx); + mtx_unlock(&VNET(nfsrchash_table)[i].mtx); } } } diff --git a/sys/fs/nfsserver/nfs_nfsdkrpc.c.vnetdcl b/sys/fs/nfsserver/nfs_nfsdkrpc.c --- a/sys/fs/nfsserver/nfs_nfsdkrpc.c.vnetdcl +++ b/sys/fs/nfsserver/nfs_nfsdkrpc.c @@ -41,6 +41,7 @@ #include "opt_kern_tls.h" #include +#include #include #include @@ -52,7 +53,6 @@ NFSDLOCKMUTEX; NFSV4ROOTLOCKMUTEX; -struct nfsv4lock nfsd_suspend_lock; char *nfsrv_zeropnfsdat = NULL; /* @@ -85,32 +85,39 @@ SYSCTL_DECL(_vfs_nfsd); -SVCPOOL *nfsrvd_pool; - -static int nfs_privport = 0; -SYSCTL_INT(_vfs_nfsd, OID_AUTO, nfs_privport, CTLFLAG_RWTUN, - &nfs_privport, 0, +VNET_DEFINE_STATIC(int, nfs_privport) = 0; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, nfs_privport, CTLFLAG_VNET | CTLFLAG_RWTUN, + &VNET_NAME(nfs_privport), 0, "Only allow clients using a privileged port for NFSv2, 3 and 4"); -static int nfs_minvers = NFS_VER2; -SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_min_nfsvers, CTLFLAG_RWTUN, - &nfs_minvers, 0, "The lowest version of NFS handled by the server"); +VNET_DEFINE_STATIC(int, nfs_minvers) = NFS_VER2; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_min_nfsvers, + CTLFLAG_VNET | CTLFLAG_RWTUN, &VNET_NAME(nfs_minvers), 0, + "The lowest version of NFS handled by the server"); -static int nfs_maxvers = NFS_VER4; -SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_max_nfsvers, CTLFLAG_RWTUN, - &nfs_maxvers, 0, "The highest version of NFS handled by the server"); +VNET_DEFINE_STATIC(int, nfs_maxvers) = NFS_VER4; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_max_nfsvers, + CTLFLAG_VNET | CTLFLAG_RWTUN, &VNET_NAME(nfs_maxvers), 0, + "The highest version of NFS handled by the server"); static int nfs_proc(struct nfsrv_descript *, u_int32_t, SVCXPRT *xprt, struct nfsrvcache **); extern u_long sb_max_adj; extern int newnfs_numnfsd; -extern struct proc *nfsd_master_proc; extern time_t nfsdev_time; extern int nfsrv_writerpc[NFS_NPROCS]; extern volatile int nfsrv_devidcnt; extern struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS]; +VNET_DECLARE(struct proc *, nfsd_master_proc); + +VNET_DEFINE(SVCPOOL *, nfsrvd_pool); +VNET_DEFINE(int, nfsrv_numnfsd) = 0; +VNET_DEFINE(struct nfsv4lock, nfsd_suspend_lock); + +VNET_DEFINE_STATIC(bool, nfsrvd_inited) = false; + /* * NFS server system calls */ @@ -125,6 +132,7 @@ u_int maxlen; #endif + CURVNET_SET(TD_TO_VNET(curthread)); memset(&nd, 0, sizeof(nd)); if (rqst->rq_vers == NFS_VER2) { if (rqst->rq_proc > NFSV2PROC_STATFS || @@ -169,7 +177,7 @@ nd.nd_mreq = NULL; nd.nd_cred = NULL; - if (nfs_privport != 0) { + if (VNET(nfs_privport) != 0) { /* Check if source port is privileged */ u_short port; struct sockaddr *nam = nd.nd_nam; @@ -261,9 +269,9 @@ * nfsv4root exports by nfsvno_v4rootexport(). */ NFSLOCKV4ROOTMUTEX(); - nfsv4_lock(&nfsd_suspend_lock, 0, NULL, NFSV4ROOTLOCKMUTEXPTR, + nfsv4_lock(&VNET(nfsd_suspend_lock), 0, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); - nfsv4_getref(&nfsd_suspend_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, + nfsv4_getref(&VNET(nfsd_suspend_lock), NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); @@ -271,7 +279,7 @@ nd.nd_repstat = nfsvno_v4rootexport(&nd); if (nd.nd_repstat != 0) { NFSLOCKV4ROOTMUTEX(); - nfsv4_relref(&nfsd_suspend_lock); + nfsv4_relref(&VNET(nfsd_suspend_lock)); NFSUNLOCKV4ROOTMUTEX(); svcerr_weakauth(rqst); svc_freereq(rqst); @@ -287,7 +295,7 @@ #endif cacherep = nfs_proc(&nd, rqst->rq_xid, xprt, &rp); NFSLOCKV4ROOTMUTEX(); - nfsv4_relref(&nfsd_suspend_lock); + nfsv4_relref(&VNET(nfsd_suspend_lock)); NFSUNLOCKV4ROOTMUTEX(); } else { NFSMGET(nd.nd_mreq); @@ -327,6 +335,7 @@ svc_freereq(rqst); out: + CURVNET_RESTORE(); ast_kclear(curthread); NFSEXITCODE(0); } @@ -467,26 +476,28 @@ * unexpectedly. */ if (so->so_type == SOCK_DGRAM) - xprt = svc_dg_create(nfsrvd_pool, so, 0, 0); + xprt = svc_dg_create(VNET(nfsrvd_pool), so, 0, 0); else - xprt = svc_vc_create(nfsrvd_pool, so, 0, 0); + xprt = svc_vc_create(VNET(nfsrvd_pool), so, 0, 0); if (xprt) { fp->f_ops = &badfileops; fp->f_data = NULL; xprt->xp_sockref = ++sockref; - if (nfs_minvers == NFS_VER2) + if (VNET(nfs_minvers) == NFS_VER2) svc_reg(xprt, NFS_PROG, NFS_VER2, nfssvc_program, NULL); - if (nfs_minvers <= NFS_VER3 && nfs_maxvers >= NFS_VER3) + if (VNET(nfs_minvers) <= NFS_VER3 && + VNET(nfs_maxvers) >= NFS_VER3) svc_reg(xprt, NFS_PROG, NFS_VER3, nfssvc_program, NULL); - if (nfs_maxvers >= NFS_VER4) + if (VNET(nfs_maxvers) >= NFS_VER4) svc_reg(xprt, NFS_PROG, NFS_VER4, nfssvc_program, NULL); if (so->so_type == SOCK_STREAM) svc_loss_reg(xprt, nfssvc_loss); SVC_RELEASE(xprt); - } + } else + error = EPERM; out: NFSEXITCODE(error); @@ -518,13 +529,15 @@ * use. */ NFSD_LOCK(); - if (newnfs_numnfsd == 0) { + if (VNET(nfsrv_numnfsd) == 0) { + nfsrvd_init(0); nfsdev_time = time_second; p = td->td_proc; PROC_LOCK(p); p->p_flag2 |= P2_AST_SU; PROC_UNLOCK(p); - newnfs_numnfsd++; + newnfs_numnfsd++; /* Total num for all vnets. */ + VNET(nfsrv_numnfsd)++; /* Num for this vnet. */ NFSD_UNLOCK(); error = nfsrv_createdevids(args, td); @@ -546,8 +559,8 @@ "nfsd: can't register svc name\n"); } - nfsrvd_pool->sp_minthreads = args->minthreads; - nfsrvd_pool->sp_maxthreads = args->maxthreads; + VNET(nfsrvd_pool)->sp_minthreads = args->minthreads; + VNET(nfsrvd_pool)->sp_maxthreads = args->maxthreads; /* * If this is a pNFS service, make Getattr do a @@ -558,7 +571,7 @@ nfsv4_opflag[NFSV4OP_GETATTR].modifyfs = 1; } - svc_run(nfsrvd_pool); + svc_run(VNET(nfsrvd_pool)); /* Reset Getattr to not do a vn_start_write(). */ nfsrv_writerpc[NFSPROC_GETATTR] = 0; @@ -572,6 +585,7 @@ } NFSD_LOCK(); newnfs_numnfsd--; + VNET(nfsrv_numnfsd)--; nfsrvd_init(1); PROC_LOCK(p); p->p_flag2 &= ~P2_AST_SU; @@ -596,21 +610,29 @@ NFSD_LOCK_ASSERT(); if (terminating) { - nfsd_master_proc = NULL; + VNET(nfsd_master_proc) = NULL; NFSD_UNLOCK(); nfsrv_freealllayoutsanddevids(); nfsrv_freeallbackchannel_xprts(); - svcpool_close(nfsrvd_pool); + svcpool_close(VNET(nfsrvd_pool)); free(nfsrv_zeropnfsdat, M_TEMP); nfsrv_zeropnfsdat = NULL; NFSD_LOCK(); } else { + /* Initialize per-vnet globals once per vnet. */ + if (VNET(nfsrvd_inited)) + return; + VNET(nfsrvd_inited) = true; NFSD_UNLOCK(); - nfsrvd_pool = svcpool_create("nfsd", +#ifndef VNET_NFSD + nfsrvd_initcache(); + nfsd_init(); +#endif + VNET(nfsrvd_pool) = svcpool_create("nfsd", SYSCTL_STATIC_CHILDREN(_vfs_nfsd)); - nfsrvd_pool->sp_rcache = NULL; - nfsrvd_pool->sp_assign = fhanew_assign; - nfsrvd_pool->sp_done = fhanew_nd_complete; + VNET(nfsrvd_pool)->sp_rcache = NULL; + VNET(nfsrvd_pool)->sp_assign = fhanew_assign; + VNET(nfsrvd_pool)->sp_done = fhanew_nd_complete; NFSD_LOCK(); } } diff --git a/sys/fs/nfsserver/nfs_nfsdport.c.vnet b/sys/fs/nfsserver/nfs_nfsdport.c --- a/sys/fs/nfsserver/nfs_nfsdport.c.vnet +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -46,10 +46,12 @@ */ #include +#include #include #include #include #include +#include #include #include #include @@ -59,46 +61,60 @@ extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern int nfsrv_useacl; extern int newnfs_numnfsd; -extern struct mount nfsv4root_mnt; -extern struct nfsrv_stablefirst nfsrv_stablefirst; -extern SVCPOOL *nfsrvd_pool; -extern struct nfsv4lock nfsd_suspend_lock; -extern struct nfsclienthashhead *nfsclienthash; -extern struct nfslockhashhead *nfslockhash; -extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; -extern struct nfsstatsv1 nfsstatsv1; extern struct nfslayouthash *nfslayouthash; extern int nfsrv_layouthashsize; extern struct mtx nfsrv_dslock_mtx; extern int nfs_pnfsiothreads; -extern struct nfsdontlisthead nfsrv_dontlisthead; -extern volatile int nfsrv_dontlistlen; extern volatile int nfsrv_devidcnt; extern int nfsrv_maxpnfsmirror; extern uint32_t nfs_srvmaxio; extern int nfs_bufpackets; extern u_long sb_max_adj; -struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; + +NFSDSTATSDECLARE(nfsstatsv1); + +VNET_DECLARE(int, nfsrv_numnfsd); +VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst); +VNET_DECLARE(SVCPOOL *, nfsrvd_pool); +VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash); +VNET_DECLARE(struct nfslockhashhead *, nfslockhash); +VNET_DECLARE(struct nfssessionhash *, nfssessionhash); +VNET_DECLARE(struct nfsv4lock, nfsd_suspend_lock); + NFSDLOCKMUTEX; NFSSTATESPINLOCK; -struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; -struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; struct mtx nfsrc_udpmtx; struct mtx nfs_v4root_mutex; struct mtx nfsrv_dontlistlock_mtx; struct mtx nfsrv_recalllock_mtx; -struct nfsrvfh nfs_rootfh, nfs_pubfh; -int nfs_pubfhset = 0, nfs_rootfhset = 0; -struct proc *nfsd_master_proc = NULL; +struct nfsrvfh nfs_pubfh; +int nfs_pubfhset = 0; int nfsd_debuglevel = 0; static pid_t nfsd_master_pid = (pid_t)-1; static char nfsd_master_comm[MAXCOMLEN + 1]; static struct timeval nfsd_master_start; static uint32_t nfsv4_sysid = 0; static fhandle_t zerofh; -struct callout nfsd_callout; +#ifdef VNET_NFSD +static int nfsrv_osd_jail_slot; +#endif +VNET_DEFINE(struct proc *, nfsd_master_proc) = NULL; +VNET_DEFINE(struct nfsrchash_bucket, nfsrchash_table[NFSRVCACHE_HASHSIZE]); +VNET_DEFINE(struct nfsrchash_bucket, nfsrcahash_table[NFSRVCACHE_HASHSIZE]); +VNET_DEFINE(struct nfsrvfh, nfs_rootfh); +VNET_DEFINE(int, nfs_rootfhset) = 0; +VNET_DEFINE(struct callout, nfsd_callout); + +VNET_DEFINE_STATIC(struct mount, nfsv4root_mnt); +VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_opt); +VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_newopt); +VNET_DEFINE_STATIC(bool, nfsrv_suspend_nfsd) = false; +VNET_DEFINE_STATIC(bool, nfsrv_mntinited) = false; + +static void nfsrv_cleanup(struct prison *); + static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, struct ucred *); static void nfsvno_updateds(struct vnode *, struct ucred *, struct thread *); @@ -1026,7 +1042,7 @@ nh = nfsrv_sequential_heuristic(uiop, vp); ioflag |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ - nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; + NFSDSTATS(nfsstatsv1).srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); free(iv, M_TEMP); if (error) { @@ -1151,7 +1167,7 @@ nh = nfsrv_sequential_heuristic(uiop, vp); ioflags |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ - nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; + NFSDSTATS(nfsstatsv1).srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; error = VOP_WRITE(vp, uiop, ioflags, cred); if (error == 0) nh->nh_nextoff = uiop->uio_offset; @@ -3255,7 +3271,7 @@ error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, exp->nes_secflavors); if (error) { - if (nfs_rootfhset) { + if (VNET(nfs_rootfhset)) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; @@ -3290,7 +3306,7 @@ error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, exp->nes_secflavors); if (error) { - if (nfs_rootfhset) { + if (VNET(nfs_rootfhset)) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; @@ -3458,9 +3474,9 @@ struct nameidata nd; fhandle_t fh; - error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); + error = vfs_export(&VNET(nfsv4root_mnt), &nfsexargp->export); if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) - nfs_rootfhset = 0; + VNET(nfs_rootfhset) = 0; else if (error == 0) { if (nfsexargp->fspec == NULL) { error = EPERM; @@ -3475,11 +3491,11 @@ error = nfsvno_getfh(nd.ni_vp, &fh, p); vrele(nd.ni_vp); if (!error) { - nfs_rootfh.nfsrvfh_len = NFSX_MYFH; + VNET(nfs_rootfh).nfsrvfh_len = NFSX_MYFH; NFSBCOPY((caddr_t)&fh, - nfs_rootfh.nfsrvfh_data, + VNET(nfs_rootfh).nfsrvfh_data, sizeof (fhandle_t)); - nfs_rootfhset = 1; + VNET(nfs_rootfhset) = 1; } } @@ -3515,29 +3531,37 @@ void nfsd_mntinit(void) { - static int inited = 0; - if (inited) + if (VNET(nfsrv_mntinited)) return; - inited = 1; - nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); - TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); - TAILQ_INIT(&nfsv4root_mnt.mnt_lazyvnodelist); - nfsv4root_mnt.mnt_export = NULL; - TAILQ_INIT(&nfsv4root_opt); - TAILQ_INIT(&nfsv4root_newopt); - nfsv4root_mnt.mnt_opt = &nfsv4root_opt; - nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; - nfsv4root_mnt.mnt_nvnodelistsize = 0; - nfsv4root_mnt.mnt_lazyvnodelistsize = 0; + VNET(nfsrv_mntinited) = true; + VNET(nfsv4root_mnt).mnt_flag = (MNT_RDONLY | MNT_EXPORTED); + mtx_init(&VNET(nfsv4root_mnt).mnt_mtx, "nfs4mnt", NULL, MTX_DEF); + lockinit(&VNET(nfsv4root_mnt).mnt_explock, PVFS, "explock", 0, 0); + TAILQ_INIT(&VNET(nfsv4root_mnt).mnt_nvnodelist); + TAILQ_INIT(&VNET(nfsv4root_mnt).mnt_lazyvnodelist); + VNET(nfsv4root_mnt).mnt_export = NULL; + TAILQ_INIT(&VNET(nfsv4root_opt)); + TAILQ_INIT(&VNET(nfsv4root_newopt)); + VNET(nfsv4root_mnt).mnt_opt = &VNET(nfsv4root_opt); + VNET(nfsv4root_mnt).mnt_optnew = &VNET(nfsv4root_newopt); + VNET(nfsv4root_mnt).mnt_nvnodelistsize = 0; + VNET(nfsv4root_mnt).mnt_lazyvnodelistsize = 0; + callout_init(&VNET(nfsd_callout), 1); } static void nfsd_timer(void *arg) { +#ifdef VNET_NFSD + struct vnet *vnetp; + vnetp = (struct vnet *)arg; +#endif + CURVNET_SET(vnetp); nfsrv_servertimer(); - callout_reset_sbt(&nfsd_callout, SBT_1S, SBT_1S, nfsd_timer, NULL, 0); + callout_reset_sbt(&VNET(nfsd_callout), SBT_1S, SBT_1S, nfsd_timer, arg, 0); + CURVNET_RESTORE(); } /* @@ -3619,7 +3643,7 @@ int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i; uint64_t exflags; - error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, + error = vfs_stdcheckexp(&VNET(nfsv4root_mnt), nd->nd_nam, &exflags, &credanon, &numsecflavor, secflavors); if (error) { error = NFSERR_PROGUNAVAIL; @@ -3652,6 +3676,18 @@ return (error); } +#ifdef VNET_NFSD +/* Osd entry for nfsrv_cleanup. */ +static int +nfsrv_prison_cleanup(void *obj, void *data __unused) +{ + struct prison *pr = obj; + + nfsrv_cleanup(pr); + return (0); +} +#endif + /* * Nfs server pseudo system call for the nfsd's */ @@ -3674,6 +3710,13 @@ char *buf, *cp, *cp2, *cp3; char fname[PNFS_FILENAME_LEN + 1]; + if (jailed(td->td_ucred) && !prison_check_nfsd(td->td_ucred)) { + error = EPERM; + goto out; + } +#ifndef VNET_NFSD + nfsd_mntinit(); +#endif if (uap->flag & NFSSVC_NFSDADDSOCK) { error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); if (error) @@ -3781,8 +3824,11 @@ nfsdarg.mdspathlen = 0; nfsdarg.mirrorcnt = 1; } - nfsd_timer(NULL); + nfsd_timer(TD_TO_VNET(td)); error = nfsrvd_nfsd(td, &nfsdarg); +#ifdef VNET_NFSD + callout_drain(&VNET(nfsd_callout)); +#endif free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(nfsdarg.dspath, M_TEMP); @@ -3881,7 +3927,6 @@ int error = EINVAL, igotlock; struct proc *procp; gid_t *grps; - static int suspend_nfsd = 0; if (uap->flag & NFSSVC_PUBLICFH) { NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, @@ -3965,10 +4010,10 @@ error = fp_getfvp(p, stablefd, &fp, &vp); if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) error = EBADF; - if (!error && newnfs_numnfsd != 0) + if (!error && VNET(nfsrv_numnfsd) != 0) error = EPERM; if (!error) { - nfsrv_stablefirst.nsf_fp = fp; + VNET(nfsrv_stablefirst).nsf_fp = fp; nfsrv_setupstable(p); } } else if (uap->flag & NFSSVC_ADMINREVOKE) { @@ -4015,25 +4060,25 @@ nfsd_master_pid = procp->p_pid; bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); nfsd_master_start = procp->p_stats->p_start; - nfsd_master_proc = procp; + VNET(nfsd_master_proc) = procp; PROC_UNLOCK(procp); } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { NFSLOCKV4ROOTMUTEX(); - if (suspend_nfsd == 0) { + if (!VNET(nfsrv_suspend_nfsd)) { /* Lock out all nfsd threads */ do { - igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, + igotlock = nfsv4_lock(&VNET(nfsd_suspend_lock), 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); - } while (igotlock == 0 && suspend_nfsd == 0); - suspend_nfsd = 1; + } while (igotlock == 0 && !VNET(nfsrv_suspend_nfsd)); + VNET(nfsrv_suspend_nfsd) = true; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { NFSLOCKV4ROOTMUTEX(); - if (suspend_nfsd != 0) { - nfsv4_unlock(&nfsd_suspend_lock, 0); - suspend_nfsd = 0; + if (VNET(nfsrv_suspend_nfsd)) { + nfsv4_unlock(&VNET(nfsd_suspend_lock), 0); + VNET(nfsrv_suspend_nfsd) = false; } NFSUNLOCKV4ROOTMUTEX(); error = 0; @@ -4141,10 +4186,10 @@ { struct proc *procp; - if (nfsd_master_proc != NULL) { + if (VNET(nfsd_master_proc) != NULL) { procp = pfind(nfsd_master_pid); /* Try to make sure it is the correct process. */ - if (procp == nfsd_master_proc && + if (procp == VNET(nfsd_master_proc) && procp->p_stats->p_start.tv_sec == nfsd_master_start.tv_sec && procp->p_stats->p_start.tv_usec == @@ -4152,7 +4197,7 @@ strcmp(procp->p_comm, nfsd_master_comm) == 0) kern_psignal(procp, SIGUSR2); else - nfsd_master_proc = NULL; + VNET(nfsd_master_proc) = NULL; if (procp != NULL) PROC_UNLOCK(procp); @@ -7063,6 +7108,57 @@ free(devid, M_TEMP); } +#ifdef VNET_NFSD +/* + * Initialize everything that needs to be initialized for a vnet. + */ +static void +nfsrv_vnetinit(const void *unused __unused) +{ + + nfsrvd_initcache(); + nfsd_mntinit(); + nfsd_init(); +} +VNET_SYSINIT(nfsrv_vnetinit, SI_SUB_VNET_DONE, SI_ORDER_ANY, + nfsrv_vnetinit, NULL); +#endif + +/* + * Clean up everything that is in a vnet and needs to be + * done when the jail is destroyed or the module unloaded. + */ +static void +nfsrv_cleanup(struct prison *pr) +{ + int i; + + CURVNET_SET(pr->pr_vnet); + /* Clean out all NFSv4 state. */ + nfsrv_throwawayallstate(curthread); + + /* Clean the NFS server reply cache */ + nfsrvd_cleancache(); + + /* Free up the krpc server pool. */ + if (VNET(nfsrvd_pool) != NULL) + svcpool_destroy(VNET(nfsrvd_pool)); + + /* and get rid of the locks */ + for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { + mtx_destroy(&VNET(nfsrchash_table)[i].mtx); + mtx_destroy(&VNET(nfsrcahash_table)[i].mtx); + } + mtx_destroy(&VNET(nfsv4root_mnt).mnt_mtx); + for (i = 0; i < nfsrv_sessionhashsize; i++) + mtx_destroy(&VNET(nfssessionhash)[i].mtx); + lockdestroy(&VNET(nfsv4root_mnt).mnt_explock); + free(VNET(nfsclienthash), M_NFSDCLIENT); + free(VNET(nfslockhash), M_NFSDLOCKFILE); + free(VNET(nfssessionhash), M_NFSDSESSION); + CURVNET_RESTORE(); +} + extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); /* @@ -7072,6 +7168,11 @@ nfsd_modevent(module_t mod, int type, void *data) { int error = 0, i; +#ifdef VNET_NFSD + osd_method_t methods[PR_MAXMETHOD] = { + [PR_METHOD_REMOVE] = nfsrv_prison_cleanup, + }; +#endif static int loaded = 0; switch (type) { @@ -7079,30 +7180,20 @@ if (loaded) goto out; newnfs_portinit(); - for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, - MTX_DEF); - mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, - MTX_DEF); - } mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); - mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); - lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); - callout_init(&nfsd_callout, 1); - nfsrvd_initcache(); - nfsd_init(); - NFSD_LOCK(); - nfsrvd_init(0); - NFSD_UNLOCK(); - nfsd_mntinit(); + NFSDSTATS(nfsstatsv1).srvcache_tcppeak = 0; + NFSDSTATS(nfsstatsv1).srvcache_size = 0; #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; #endif nfsd_call_nfsd = nfssvc_nfsd; +#ifdef VNET_NFSD + nfsrv_osd_jail_slot = osd_jail_register(NULL, methods); +#endif loaded = 1; break; @@ -7117,39 +7208,21 @@ vn_deleg_ops.vndeleg_disable = NULL; #endif nfsd_call_nfsd = NULL; +#ifdef VNET_NFSD + osd_jail_deregister(nfsrv_osd_jail_slot); +#else callout_drain(&nfsd_callout); - - /* Clean out all NFSv4 state. */ - nfsrv_throwawayallstate(curthread); - - /* Clean the NFS server reply cache */ - nfsrvd_cleancache(); - - /* Free up the krpc server pool. */ - if (nfsrvd_pool != NULL) - svcpool_destroy(nfsrvd_pool); - - /* and get rid of the locks */ - for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - mtx_destroy(&nfsrchash_table[i].mtx); - mtx_destroy(&nfsrcahash_table[i].mtx); - } +#endif + nfsrv_cleanup(&prison0); mtx_destroy(&nfsrc_udpmtx); mtx_destroy(&nfs_v4root_mutex); - mtx_destroy(&nfsv4root_mnt.mnt_mtx); mtx_destroy(&nfsrv_dontlistlock_mtx); mtx_destroy(&nfsrv_recalllock_mtx); - for (i = 0; i < nfsrv_sessionhashsize; i++) - mtx_destroy(&nfssessionhash[i].mtx); if (nfslayouthash != NULL) { for (i = 0; i < nfsrv_layouthashsize; i++) mtx_destroy(&nfslayouthash[i].mtx); free(nfslayouthash, M_NFSDSESSION); } - lockdestroy(&nfsv4root_mnt.mnt_explock); - free(nfsclienthash, M_NFSDCLIENT); - free(nfslockhash, M_NFSDLOCKFILE); - free(nfssessionhash, M_NFSDSESSION); loaded = 0; break; default: diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c.vnet b/sys/fs/nfsserver/nfs_nfsdsocket.c --- a/sys/fs/nfsserver/nfs_nfsdsocket.c.vnet +++ b/sys/fs/nfsserver/nfs_nfsdsocket.c @@ -41,21 +41,27 @@ */ #include +#include -extern struct nfsstatsv1 nfsstatsv1; -extern struct nfsrvfh nfs_pubfh, nfs_rootfh; -extern int nfs_pubfhset, nfs_rootfhset; +extern struct nfsrvfh nfs_pubfh; +extern int nfs_pubfhset; extern struct nfsv4lock nfsv4rootfs_lock; -extern struct nfsrv_stablefirst nfsrv_stablefirst; -extern struct nfsclienthashhead *nfsclienthash; extern int nfsrv_clienthashsize; -extern int nfsrc_floodlevel, nfsrc_tcpsavedreplies; extern int nfsd_debuglevel; extern int nfsrv_layouthighwater; extern volatile int nfsrv_layoutcnt; NFSV4ROOTLOCKMUTEX; NFSSTATESPINLOCK; +NFSDSTATSDECLARE(nfsstatsv1); + +VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst); +VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash); +VNET_DECLARE(int, nfsrc_floodlevel); +VNET_DECLARE(int, nfsrc_tcpsavedreplies); +VNET_DECLARE(struct nfsrvfh, nfs_rootfh); +VNET_DECLARE(int, nfs_rootfhset); + int (*nfsrv3_procs0[NFS_V3NPROCS])(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *))0, @@ -471,15 +477,15 @@ } mtx_lock(&nfsrvd_statmtx); - if (nfsstatsv1.srvstartcnt == nfsstatsv1.srvdonecnt) { + if (NFSDSTATS(nfsstatsv1).srvstartcnt == NFSDSTATS(nfsstatsv1).srvdonecnt) { if (now != NULL) - nfsstatsv1.busyfrom = *now; + NFSDSTATS(nfsstatsv1).busyfrom = *now; else - binuptime(&nfsstatsv1.busyfrom); + binuptime(&NFSDSTATS(nfsstatsv1).busyfrom); } - nfsstatsv1.srvrpccnt[op]++; - nfsstatsv1.srvstartcnt++; + NFSDSTATS(nfsstatsv1).srvrpccnt[op]++; + NFSDSTATS(nfsstatsv1).srvstartcnt++; mtx_unlock(&nfsrvd_statmtx); } @@ -502,21 +508,21 @@ mtx_lock(&nfsrvd_statmtx); - nfsstatsv1.srvbytes[op] += bytes; - nfsstatsv1.srvops[op]++; + NFSDSTATS(nfsstatsv1).srvbytes[op] += bytes; + NFSDSTATS(nfsstatsv1).srvops[op]++; if (then != NULL) { dt = *now; bintime_sub(&dt, then); - bintime_add(&nfsstatsv1.srvduration[op], &dt); + bintime_add(&NFSDSTATS(nfsstatsv1).srvduration[op], &dt); } dt = *now; - bintime_sub(&dt, &nfsstatsv1.busyfrom); - bintime_add(&nfsstatsv1.busytime, &dt); - nfsstatsv1.busyfrom = *now; + bintime_sub(&dt, &NFSDSTATS(nfsstatsv1).busyfrom); + bintime_add(&NFSDSTATS(nfsstatsv1).busytime, &dt); + NFSDSTATS(nfsstatsv1).busyfrom = *now; - nfsstatsv1.srvdonecnt++; + NFSDSTATS(nfsstatsv1).srvdonecnt++; mtx_unlock(&nfsrvd_statmtx); } @@ -753,7 +759,7 @@ */ igotlock = 0; NFSLOCKV4ROOTMUTEX(); - if (nfsrv_stablefirst.nsf_flags & NFSNSF_NEEDLOCK) + if (VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_NEEDLOCK) igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); else @@ -766,8 +772,8 @@ * Done when the grace period is over or a client has long * since expired. */ - nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NEEDLOCK; - if ((nfsrv_stablefirst.nsf_flags & + VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_NEEDLOCK; + if ((VNET(nfsrv_stablefirst).nsf_flags & (NFSNSF_GRACEOVER | NFSNSF_UPDATEDONE)) == NFSNSF_GRACEOVER) nfsrv_updatestable(p); @@ -777,10 +783,10 @@ * stable storage file and then remove them from the client * list. */ - if (nfsrv_stablefirst.nsf_flags & NFSNSF_EXPIREDCLIENT) { - nfsrv_stablefirst.nsf_flags &= ~NFSNSF_EXPIREDCLIENT; + if (VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_EXPIREDCLIENT) { + VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_EXPIREDCLIENT; for (i = 0; i < nfsrv_clienthashsize; i++) { - LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, + LIST_FOREACH_SAFE(clp, &VNET(nfsclienthash)[i], lc_hash, nclp) { if (clp->lc_flags & LCL_EXPIREIT) { if (!LIST_EMPTY(&clp->lc_open) || @@ -814,7 +820,7 @@ * If flagged, search for open owners that haven't had any opens * for a long time. */ - if (nfsrv_stablefirst.nsf_flags & NFSNSF_NOOPENS) { + if (VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_NOOPENS) { nfsrv_throwawayopens(p); } @@ -941,8 +947,8 @@ if (i == 0 && (nd->nd_rp == NULL || nd->nd_rp->rc_refcnt == 0) && (nfsrv_mallocmget_limit() || - nfsrc_tcpsavedreplies > nfsrc_floodlevel)) { - if (nfsrc_tcpsavedreplies > nfsrc_floodlevel) + VNET(nfsrc_tcpsavedreplies) > VNET(nfsrc_floodlevel))) { + if (VNET(nfsrc_tcpsavedreplies) > VNET(nfsrc_floodlevel)) printf("nfsd server cache flooded, try " "increasing vfs.nfsd.tcphighwater\n"); nd->nd_repstat = NFSERR_RESOURCE; @@ -1033,7 +1039,7 @@ } break; case NFSV4OP_PUTROOTFH: - if (nfs_rootfhset) { + if (VNET(nfs_rootfhset)) { if ((nd->nd_flag & ND_LASTOP) == 0) { /* * Pre-parse the next op#. If it is @@ -1054,7 +1060,7 @@ } while (nextop == NFSV4OP_SAVEFH && i < numops - 1); } - nfsd_fhtovp(nd, &nfs_rootfh, LK_SHARED, &nvp, + nfsd_fhtovp(nd, &VNET(nfs_rootfh), LK_SHARED, &nvp, &nes, NULL, 0, nextop); if (!nd->nd_repstat) { if (vp) diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c.vnetdcl b/sys/fs/nfsserver/nfs_nfsdstate.c --- a/sys/fs/nfsserver/nfs_nfsdstate.c.vnetdcl +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -34,18 +34,20 @@ #include "opt_inet6.h" #include #include +#include -struct nfsrv_stablefirst nfsrv_stablefirst; int nfsrv_issuedelegs = 0; int nfsrv_dolocallocks = 0; struct nfsv4lock nfsv4rootfs_lock; time_t nfsdev_time = 0; int nfsrv_layouthashsize; volatile int nfsrv_layoutcnt = 0; -extern uint32_t nfs_srvmaxio; -extern int newnfs_numnfsd; -extern struct nfsstatsv1 nfsstatsv1; +VNET_DEFINE(struct nfsrv_stablefirst, nfsrv_stablefirst); + +VNET_DECLARE(int, nfsrv_numnfsd); + +extern uint32_t nfs_srvmaxio; extern int nfsrv_lease; extern struct timeval nfsboottime; extern u_int32_t newnfs_true, newnfs_false; @@ -64,6 +66,8 @@ extern struct nfslayouthead nfsrv_recalllisthead; extern char *nfsrv_zeropnfsdat; +NFSDSTATSDECLARE(nfsstatsv1); + SYSCTL_DECL(_vfs_nfsd); int nfsrv_statehashsize = NFSSTATEHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN, @@ -118,20 +122,22 @@ /* * Hash lists for nfs V4. */ -struct nfsclienthashhead *nfsclienthash; -struct nfslockhashhead *nfslockhash; -struct nfssessionhash *nfssessionhash; +VNET_DEFINE(struct nfsclienthashhead *, nfsclienthash); +VNET_DEFINE(struct nfslockhashhead *, nfslockhash); +VNET_DEFINE(struct nfssessionhash *, nfssessionhash); + struct nfslayouthash *nfslayouthash; volatile int nfsrv_dontlistlen = 0; static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0; -static time_t nfsrvboottime; static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0; static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER; static int nfsrv_nogsscallback = 0; static volatile int nfsrv_writedelegcnt = 0; static int nfsrv_faildscnt; +VNET_DEFINE_STATIC(time_t, nfsrvboottime); + /* local functions */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp); @@ -298,7 +304,7 @@ */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { - LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { + LIST_FOREACH(clp, &VNET(nfsclienthash)[i], lc_hash) { if (new_clp->lc_idlen == clp->lc_idlen && !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; @@ -343,7 +349,7 @@ confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = - (u_int32_t)nfsrvboottime; + VNET(nfsrvboottime); clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; @@ -359,7 +365,7 @@ LIST_INIT(&new_clp->lc_stateid[i]); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); - nfsstatsv1.srvclients++; + NFSDSTATS(nfsstatsv1).srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); @@ -457,7 +463,7 @@ confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = - nfsrvboottime; + VNET(nfsrvboottime); clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; @@ -488,7 +494,7 @@ LIST_INIT(&new_clp->lc_session); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); - nfsstatsv1.srvclients++; + NFSDSTATS(nfsstatsv1).srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); @@ -553,7 +559,7 @@ LIST_INIT(&new_clp->lc_session); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); - nfsstatsv1.srvclients++; + NFSDSTATS(nfsstatsv1).srvclients++; nfsrv_openpluslock++; nfsrv_clients++; } @@ -604,7 +610,7 @@ if (clpp) *clpp = NULL; if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 || - opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) { + opflags != CLOPS_RENEW) && VNET(nfsrvboottime) != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } @@ -820,7 +826,7 @@ struct nfsclienthashhead *hp; int error = 0, i, igotlock; - if (nfsrvboottime != clientid.lval[0]) { + if (VNET(nfsrvboottime) != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } @@ -912,7 +918,7 @@ */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { - LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { + LIST_FOREACH(clp, &VNET(nfsclienthash)[i], lc_hash) { if (revokep->nclid_idlen == clp->lc_idlen && !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; @@ -974,8 +980,8 @@ * Rattle through the client lists until done. */ while (i < nfsrv_clienthashsize && cnt < maxcnt) { - clp = LIST_FIRST(&nfsclienthash[i]); - while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) { + clp = LIST_FIRST(&VNET(nfsclienthash)[i]); + while (clp != LIST_END(&VNET(nfsclienthash)[i]) && cnt < maxcnt) { nfsrv_dumpaclient(clp, &dumpp[cnt]); cnt++; clp = LIST_NEXT(clp, lc_hash); @@ -1251,14 +1257,14 @@ * If server hasn't started yet, just return. */ NFSLOCKSTATE(); - if (nfsrv_stablefirst.nsf_eograce == 0) { + if (VNET(nfsrv_stablefirst).nsf_eograce == 0) { NFSUNLOCKSTATE(); return; } - if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) { - if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) && - NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce) - nfsrv_stablefirst.nsf_flags |= + if (!(VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE)) { + if (!(VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_GRACEOVER) && + NFSD_MONOSEC > VNET(nfsrv_stablefirst).nsf_eograce) + VNET(nfsrv_stablefirst).nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); NFSUNLOCKSTATE(); return; @@ -1281,8 +1287,8 @@ * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { - clp = LIST_FIRST(&nfsclienthash[i]); - while (clp != LIST_END(&nfsclienthash[i])) { + clp = LIST_FIRST(&VNET(nfsclienthash)[i]); + while (clp != LIST_END(&VNET(nfsclienthash)[i])) { nclp = LIST_NEXT(clp, lc_hash); if (!(clp->lc_flags & LCL_EXPIREIT)) { if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC @@ -1313,7 +1319,7 @@ * by an nfsd sometime soon. */ clp->lc_flags |= LCL_EXPIREIT; - nfsrv_stablefirst.nsf_flags |= + VNET(nfsrv_stablefirst).nsf_flags |= (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT); } else { /* @@ -1331,7 +1337,7 @@ if (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > nfsrv_v4statelimit) - nfsrv_stablefirst.nsf_flags |= + VNET(nfsrv_stablefirst).nsf_flags |= NFSNSF_NOOPENS; } else { stp->ls_noopens = 0; @@ -1397,7 +1403,7 @@ free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); NFSLOCKSTATE(); - nfsstatsv1.srvclients--; + NFSDSTATS(nfsstatsv1).srvclients--; nfsrv_openpluslock--; nfsrv_clients--; NFSUNLOCKSTATE(); @@ -1440,7 +1446,7 @@ nfsv4_testlock(&lfp->lf_locallock_lck) == 0) nfsrv_freenfslockfile(lfp); free(stp, M_NFSDSTATE); - nfsstatsv1.srvdelegates--; + NFSDSTATS(nfsstatsv1).srvdelegates--; nfsrv_openpluslock--; nfsrv_delegatecnt--; } @@ -1466,7 +1472,7 @@ if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); free(stp, M_NFSDSTATE); - nfsstatsv1.srvopenowners--; + NFSDSTATS(nfsstatsv1).srvopenowners--; nfsrv_openpluslock--; } @@ -1516,7 +1522,7 @@ if (cansleep != 0) NFSUNLOCKSTATE(); free(stp, M_NFSDSTATE); - nfsstatsv1.srvopens--; + NFSDSTATS(nfsstatsv1).srvopens--; nfsrv_openpluslock--; return (ret); } @@ -1535,7 +1541,7 @@ if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); free(stp, M_NFSDSTATE); - nfsstatsv1.srvlockowners--; + NFSDSTATS(nfsstatsv1).srvlockowners--; nfsrv_openpluslock--; } @@ -1611,7 +1617,7 @@ if (lop->lo_lckfile.le_prev != NULL) { LIST_REMOVE(lop, lo_lckfile); - nfsstatsv1.srvlocks--; + NFSDSTATS(nfsstatsv1).srvlocks--; nfsrv_openpluslock--; } LIST_REMOVE(lop, lo_lckowner); @@ -2388,7 +2394,7 @@ LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list); *new_lopp = NULL; *new_stpp = NULL; - nfsstatsv1.srvlockowners++; + NFSDSTATS(nfsstatsv1).srvlockowners++; nfsrv_openpluslock++; } if (filestruct_locked != 0) { @@ -3040,12 +3046,12 @@ LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; - nfsstatsv1.srvopenowners++; + NFSDSTATS(nfsstatsv1).srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; - nfsstatsv1.srvopens++; + NFSDSTATS(nfsstatsv1).srvopens++; nfsrv_openpluslock++; break; } @@ -3106,7 +3112,7 @@ NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) || !NFSVNO_DELEGOK(vp)) *rflagsp |= NFSV4OPEN_RECALL; - nfsstatsv1.srvdelegates++; + NFSDSTATS(nfsstatsv1).srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; @@ -3146,12 +3152,12 @@ LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; - nfsstatsv1.srvopenowners++; + NFSDSTATS(nfsstatsv1).srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; - nfsstatsv1.srvopens++; + NFSDSTATS(nfsstatsv1).srvopens++; nfsrv_openpluslock++; } else { error = NFSERR_RECLAIMCONFLICT; @@ -3223,7 +3229,7 @@ new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; - nfsstatsv1.srvdelegates++; + NFSDSTATS(nfsstatsv1).srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } @@ -3245,7 +3251,7 @@ new_open, ls_hash); openstp = new_open; new_open = NULL; - nfsstatsv1.srvopens++; + NFSDSTATS(nfsstatsv1).srvopens++; nfsrv_openpluslock++; /* @@ -3293,7 +3299,7 @@ new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; - nfsstatsv1.srvdelegates++; + NFSDSTATS(nfsstatsv1).srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } @@ -3374,7 +3380,7 @@ LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; - nfsstatsv1.srvdelegates++; + NFSDSTATS(nfsstatsv1).srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } @@ -3402,9 +3408,9 @@ openstp = new_open; new_open = NULL; *new_stpp = NULL; - nfsstatsv1.srvopens++; + NFSDSTATS(nfsstatsv1).srvopens++; nfsrv_openpluslock++; - nfsstatsv1.srvopenowners++; + NFSDSTATS(nfsstatsv1).srvopenowners++; nfsrv_openpluslock++; } if (!error) { @@ -3880,7 +3886,7 @@ else LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner); if (stp != NULL) { - nfsstatsv1.srvlocks++; + NFSDSTATS(nfsstatsv1).srvlocks++; nfsrv_openpluslock++; } } @@ -4357,11 +4363,11 @@ */ if (flags & (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) { - if (clientid.lval[0] != nfsrvboottime) { + if (clientid.lval[0] != VNET(nfsrvboottime)) { ret = NFSERR_STALECLIENTID; goto out; } - } else if (stateidp->other[0] != nfsrvboottime && + } else if (stateidp->other[0] != VNET(nfsrvboottime) && specialid == 0) { ret = NFSERR_STALESTATEID; goto out; @@ -4394,25 +4400,25 @@ int error = 0, notreclaimed; struct nfsrv_stable *sp; - if ((nfsrv_stablefirst.nsf_flags & (NFSNSF_UPDATEDONE | + if ((VNET(nfsrv_stablefirst).nsf_flags & (NFSNSF_UPDATEDONE | NFSNSF_GRACEOVER)) == 0) { /* * First, check to see if all of the clients have done a * ReclaimComplete. If so, grace can end now. */ notreclaimed = 0; - LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { + LIST_FOREACH(sp, &VNET(nfsrv_stablefirst).nsf_head, nst_list) { if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) { notreclaimed = 1; break; } } if (notreclaimed == 0) - nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER | + VNET(nfsrv_stablefirst).nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); } - if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) { + if ((VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_GRACEOVER) != 0) { if (flags & NFSLCK_RECLAIM) { error = NFSERR_NOGRACE; goto out; @@ -4434,8 +4440,8 @@ * extend grace a bit. */ if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) > - nfsrv_stablefirst.nsf_eograce) - nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC + + VNET(nfsrv_stablefirst).nsf_eograce) + VNET(nfsrv_stablefirst).nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; } @@ -4870,7 +4876,7 @@ void nfsrv_setupstable(NFSPROC_T *p) { - struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; + struct nfsrv_stablefirst *sf = &VNET(nfsrv_stablefirst); struct nfsrv_stable *sp, *nsp; struct nfst_rec *tsp; int error, i, tryagain; @@ -4886,7 +4892,7 @@ /* * Set Grace over just until the file reads successfully. */ - nfsrvboottime = time_second; + VNET(nfsrvboottime) = time_second; LIST_INIT(&sf->nsf_head); sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; @@ -4903,7 +4909,7 @@ * Now, read in the boottimes. */ sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) * - sizeof (time_t), M_TEMP, M_WAITOK); + sizeof(time_t), M_TEMP, M_WAITOK); off = sizeof (struct nfsf_rec); error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off, @@ -4921,8 +4927,8 @@ do { tryagain = 0; for (i = 0; i < sf->nsf_numboots; i++) { - if (nfsrvboottime == sf->nsf_bootvals[i]) { - nfsrvboottime++; + if (VNET(nfsrvboottime) == sf->nsf_bootvals[i]) { + VNET(nfsrvboottime)++; tryagain = 1; break; } @@ -5005,7 +5011,7 @@ void nfsrv_updatestable(NFSPROC_T *p) { - struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; + struct nfsrv_stablefirst *sf = &VNET(nfsrv_stablefirst); struct nfsrv_stable *sp, *nsp; int i; struct nfsvattr nva; @@ -5031,10 +5037,10 @@ sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i]; } else { sf->nsf_numboots = 1; - sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t), + sf->nsf_bootvals = (time_t *)malloc(sizeof(time_t), M_TEMP, M_WAITOK); } - sf->nsf_bootvals[0] = nfsrvboottime; + sf->nsf_bootvals[0] = VNET(nfsrvboottime); sf->nsf_lease = nfsrv_lease; NFSVNO_ATTRINIT(&nva); NFSVNO_SETATTRVAL(&nva, size, 0); @@ -5088,7 +5094,7 @@ void nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p) { - struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; + struct nfsrv_stablefirst *sf = &VNET(nfsrv_stablefirst); struct nfst_rec *sp; int error; @@ -5121,12 +5127,12 @@ /* * First find the client structure. */ - LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { + LIST_FOREACH(sp, &VNET(nfsrv_stablefirst).nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } - if (sp == LIST_END(&nfsrv_stablefirst.nsf_head)) + if (sp == LIST_END(&VNET(nfsrv_stablefirst).nsf_head)) return; /* @@ -5148,12 +5154,12 @@ /* * First find the client structure. */ - LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { + LIST_FOREACH(sp, &VNET(nfsrv_stablefirst).nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } - if (sp == LIST_END(&nfsrv_stablefirst.nsf_head)) + if (sp == LIST_END(&VNET(nfsrv_stablefirst).nsf_head)) return; /* @@ -5174,7 +5180,7 @@ /* * First, find the entry for the client. */ - LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { + LIST_FOREACH(sp, &VNET(nfsrv_stablefirst).nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; @@ -5184,9 +5190,9 @@ * If not in the list, state was revoked or no state was issued * since the previous reboot, a reclaim is denied. */ - if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) || + if (sp == LIST_END(&VNET(nfsrv_stablefirst).nsf_head) || (sp->nst_flag & NFSNST_REVOKE) || - !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK)) + !(VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_OK)) return (1); return (0); } @@ -5214,7 +5220,7 @@ * If lease hasn't expired, we can't fix it. */ if (clp->lc_expiry >= NFSD_MONOSEC || - !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) + !(VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE)) return (0); if (*haslockp == 0) { NFSUNLOCKSTATE(); @@ -5613,7 +5619,7 @@ * First, check to see if the server is currently running and it has * been called for a regular file when issuing delegations. */ - if (newnfs_numnfsd == 0 || vp->v_type != VREG || + if (VNET(nfsrv_numnfsd) == 0 || vp->v_type != VREG || nfsrv_issuedelegs == 0) return; @@ -5847,12 +5853,12 @@ int i; NFSLOCKSTATE(); - nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS; + VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_NOOPENS; /* * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { - LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { + LIST_FOREACH_SAFE(clp, &VNET(nfsclienthash)[i], lc_hash, nclp) { LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) { if (LIST_EMPTY(&stp->ls_open) && (stp->ls_noopens > NFSNOOPEN || @@ -5908,7 +5914,7 @@ nfsrv_leaseexpiry(void) { - if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC) + if (VNET(nfsrv_stablefirst).nsf_eograce > NFSD_MONOSEC) return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA)); return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA); } @@ -6229,7 +6235,7 @@ * For each client, clean out the state and then free the structure. */ for (i = 0; i < nfsrv_clienthashsize; i++) { - LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { + LIST_FOREACH_SAFE(clp, &VNET(nfsclienthash)[i], lc_hash, nclp) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); @@ -6242,7 +6248,7 @@ * Also, free up any remaining lock file structures. */ for (i = 0; i < nfsrv_lockhashsize; i++) { - LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) { + LIST_FOREACH_SAFE(lfp, &VNET(nfslockhash)[i], lf_hash, nlfp) { printf("nfsd unload: fnd a lock file struct\n"); nfsrv_freenfslockfile(lfp); } @@ -6360,7 +6366,7 @@ sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); - if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) { + if ((VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_GRACEOVER) != 0) { buf = malloc(INET6_ADDRSTRLEN, M_TEMP, M_WAITOK); switch (nd->nd_nam->sa_family) { #ifdef INET @@ -6722,7 +6728,7 @@ int i; for (i = 0; i < nfsrv_clienthashsize; i++) { - LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { + LIST_FOREACH(clp, &VNET(nfsclienthash)[i], lc_hash) { LIST_FOREACH(sep, &clp->lc_session, sess_list) { xprt = sep->sess_cbsess.nfsess_xprt; sep->sess_cbsess.nfsess_xprt = NULL; @@ -7438,7 +7444,7 @@ /* Insert the new layout in the lists. */ *lypp = NULL; atomic_add_int(&nfsrv_layoutcnt, 1); - nfsstatsv1.srvlayouts++; + NFSDSTATS(nfsstatsv1).srvlayouts++; NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen); *layoutlenp = lyp->lay_layoutlen; TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list); @@ -7531,7 +7537,7 @@ NFSD_DEBUG(4, "Freelayout=%p\n", lyp); atomic_add_int(&nfsrv_layoutcnt, -1); - nfsstatsv1.srvlayouts--; + NFSDSTATS(nfsstatsv1).srvlayouts--; TAILQ_REMOVE(lhp, lyp, lay_list); free(lyp, M_NFSDSTATE); } @@ -7681,6 +7687,10 @@ NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp); *dsp = NULL; + if (jailed(p->td_ucred)) { + printf("A pNFS nfsd cannot run in a jail\n"); + return (EPERM); + } NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, dspathp); error = namei(&nd); diff --git a/sys/fs/nfsserver/nfs_nfsdsubs.c.vnetdcl b/sys/fs/nfsserver/nfs_nfsdsubs.c --- a/sys/fs/nfsserver/nfs_nfsdsubs.c.vnetdcl +++ b/sys/fs/nfsserver/nfs_nfsdsubs.c @@ -42,20 +42,25 @@ * copy data between mbuf chains and uio lists. */ #include +#include extern u_int32_t newnfs_true, newnfs_false; -extern int nfs_rootfhset; extern int nfs_pubfhset; -extern struct nfsclienthashhead *nfsclienthash; extern int nfsrv_clienthashsize; -extern struct nfslockhashhead *nfslockhash; extern int nfsrv_lockhashsize; -extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; extern int nfsrv_useacl; extern uid_t nfsrv_defaultuid; extern gid_t nfsrv_defaultgid; +VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash); +VNET_DECLARE(struct nfslockhashhead *, nfslockhash); +VNET_DECLARE(struct nfssessionhash *, nfssessionhash); +VNET_DECLARE(int, nfs_rootfhset); + +VNET_DEFINE(struct nfsdontlisthead, nfsrv_dontlisthead); + + char nfs_v2pubfh[NFSX_V2FH]; struct nfsdontlisthead nfsrv_dontlisthead; struct nfslayouthead nfsrv_recalllisthead; @@ -2080,29 +2085,25 @@ nfsd_init(void) { int i; - static int inited = 0; - if (inited) - return; - inited = 1; /* * Initialize client queues. Don't free/reinitialize * them when nfsds are restarted. */ - nfsclienthash = malloc(sizeof(struct nfsclienthashhead) * + VNET(nfsclienthash) = malloc(sizeof(struct nfsclienthashhead) * nfsrv_clienthashsize, M_NFSDCLIENT, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_clienthashsize; i++) - LIST_INIT(&nfsclienthash[i]); - nfslockhash = malloc(sizeof(struct nfslockhashhead) * + LIST_INIT(&VNET(nfsclienthash)[i]); + VNET(nfslockhash) = malloc(sizeof(struct nfslockhashhead) * nfsrv_lockhashsize, M_NFSDLOCKFILE, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lockhashsize; i++) - LIST_INIT(&nfslockhash[i]); - nfssessionhash = malloc(sizeof(struct nfssessionhash) * + LIST_INIT(&VNET(nfslockhash)[i]); + VNET(nfssessionhash) = malloc(sizeof(struct nfssessionhash) * nfsrv_sessionhashsize, M_NFSDSESSION, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_sessionhashsize; i++) { - mtx_init(&nfssessionhash[i].mtx, "nfssm", NULL, MTX_DEF); - LIST_INIT(&nfssessionhash[i].list); + mtx_init(&VNET(nfssessionhash)[i].mtx, "nfssm", NULL, MTX_DEF); + LIST_INIT(&VNET(nfssessionhash)[i].list); } LIST_INIT(&nfsrv_dontlisthead); TAILQ_INIT(&nfsrv_recalllisthead); @@ -2119,7 +2120,7 @@ nfsd_checkrootexp(struct nfsrv_descript *nd) { - if (nfs_rootfhset == 0) + if (VNET(nfs_rootfhset) == 0) return (NFSERR_AUTHERR | AUTH_FAILED); if ((nd->nd_flag & (ND_GSS | ND_EXAUTHSYS)) == ND_EXAUTHSYS) goto checktls; diff --git a/sys/kern/kern_jail.c.vnet b/sys/kern/kern_jail.c --- a/sys/kern/kern_jail.c.vnet +++ b/sys/kern/kern_jail.c @@ -34,6 +34,7 @@ #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_nfs.h" #include #include @@ -218,6 +219,9 @@ {"allow.unprivileged_proc_debug", "allow.nounprivileged_proc_debug", PR_ALLOW_UNPRIV_DEBUG}, {"allow.suser", "allow.nosuser", PR_ALLOW_SUSER}, +#if defined(VNET_NFSD) && defined(VIMAGE) && defined(NFSD) + {"allow.nfsd", "allow.nonfsd", PR_ALLOW_NFSD}, +#endif }; static unsigned pr_allow_all = PR_ALLOW_ALL_STATIC; const size_t pr_flag_allow_size = sizeof(pr_flag_allow); @@ -2102,6 +2106,13 @@ } #endif +#ifdef VNET_NFSD + if (born && pr != &prison0 && (pr->pr_allow & PR_ALLOW_NFSD) != 0 && + (pr->pr_root->v_vflag & VV_ROOT) == 0) + printf("Warning jail jid=%d: mountd/nfsd requires a separate" + " file system\n", pr->pr_id); +#endif + drflags &= ~PD_KILL; td->td_retval[0] = pr->pr_id; @@ -3464,6 +3475,29 @@ } /* + * For mountd/nfsd to run within a prison, it must be: + * - A vnet prison. + * - PR_ALLOW_NFSD must be set on it. + * - The root directory (pr_root) of the prison must be + * a file system mount point, so the mountd can hang + * export information on it. + */ +bool +prison_check_nfsd(struct ucred *cred) +{ + + if (!jailed(cred)) + return (false); + if (jailed_without_vnet(cred)) + return (false); + if (!prison_allow(cred, PR_ALLOW_NFSD)) + return (false); + if ((cred->cr_prison->pr_root->v_vflag & VV_ROOT) == 0) + return (false); + return (true); +} + +/* * Return 1 if p2 is a child of p1, otherwise 0. */ int @@ -3717,11 +3751,20 @@ * is only granted conditionally in the legacy jail case. */ switch (priv) { -#ifdef notyet /* * NFS-specific privileges. */ case PRIV_NFS_DAEMON: + case PRIV_VFS_GETFH: +#ifdef VNET_NFSD + if (!prison_allow(cred, PR_ALLOW_NFSD) || + (cred->cr_prison->pr_root->v_vflag & VV_ROOT) == 0) +#else + printf("running nfsd in a prison requires a kernel " + "built with ''options VNET_NFSD''\n"); +#endif + return (EPERM); +#ifdef notyet case PRIV_NFS_LOCKD: #endif /* @@ -4472,6 +4515,10 @@ "B", "Unprivileged processes may use process debugging facilities"); SYSCTL_JAIL_PARAM(_allow, suser, CTLTYPE_INT | CTLFLAG_RW, "B", "Processes in jail with uid 0 have privilege"); +#if defined(VNET_NFSD) && defined(VIMAGE) && defined(NFSD) +SYSCTL_JAIL_PARAM(_allow, nfsd, CTLTYPE_INT | CTLFLAG_RW, + "B", "Mountd/nfsd may run in the jail"); +#endif SYSCTL_JAIL_PARAM_SUBNODE(allow, mount, "Jail mount/unmount permission flags"); SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW, diff --git a/sys/kern/vfs_mount.c.vnetmnt b/sys/kern/vfs_mount.c --- a/sys/kern/vfs_mount.c.vnetmnt +++ b/sys/kern/vfs_mount.c @@ -924,6 +924,16 @@ fsflags |= MNT_SYNCHRONOUS; else if (strcmp(opt->name, "union") == 0) fsflags |= MNT_UNION; +#ifdef VNET_NFSD + else if (strcmp(opt->name, "export") == 0) { + /* + * Set MNT_EXPORTED for the specific case of a + * vnet jailed call with the "export" option, + * so that mountd can run within that jail. + */ + fsflags |= MNT_EXPORTED; + } +#endif else if (strcmp(opt->name, "automounted") == 0) { fsflags |= MNT_AUTOMOUNTED; do_freeopt = 1; @@ -1285,7 +1295,17 @@ * Only privileged root, or (if MNT_USER is set) the user that * did the original mount is permitted to update it. */ - error = vfs_suser(mp, td); +#ifdef VNET_NFSD + if ((fsflags & MNT_EXPORTED) != 0 && prison_check_nfsd(td->td_ucred)) { + /* For mountd running in a prison, just check uid == 0. */ + error = 0; + if (td->td_ucred->cr_uid != 0) + error = EPERM; + } else +#endif + { + error = vfs_suser(mp, td); + } if (error != 0) { vput(vp); return (error); @@ -1330,7 +1350,18 @@ * XXX The final recipients of VFS_MOUNT just overwrite the ndp they * get. No freeing of cn_pnbuf. */ - error = VFS_MOUNT(mp); +#ifdef VNET_NFSD + error = 0; + /* + * For the case of mountd doing exports from within a vnet prison, + * "from" is not set correctly such that VFS_MOUNT() will return ENOENT. + * It is not obvious that VFS_MOUNT() ever needs to be called when + * mountd is doing exports, but this check only applies to the + * specific case where it is running inside a vnet prison, for now. + */ + if ((fsflags & MNT_EXPORTED) == 0 || !prison_check_nfsd(td->td_ucred)) +#endif + error = VFS_MOUNT(mp); export_error = 0; /* Process the export option. */ @@ -1485,18 +1516,37 @@ if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) return (ENAMETOOLONG); - if (jailed(td->td_ucred) || usermount == 0) { - if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0) - return (error); - } - - /* - * Do not allow NFS export or MNT_SUIDDIR by unprivileged users. - */ - if (fsflags & MNT_EXPORTED) { - error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED); +#ifdef VNET_NFSD + if ((fsflags & MNT_EXPORTED) != 0 && prison_check_nfsd(td->td_ucred)) { + error = priv_check(td, PRIV_NFS_DAEMON); if (error) return (error); + } else +#endif + { + if (jailed(td->td_ucred) || usermount == 0) { + if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0) + return (error); + } + + /* + * Do not allow NFS export or MNT_SUIDDIR by unprivileged users. + */ +#ifdef notnow + /* + * This test was never done before the nfsd in a vnet + * prison patch, since MNT_EXPORTED was never set. + * Since kern_priv.c never checks PRIV_VFS_MOUNT_EXPORTED, + * it is not obvious to me what the check was supposed + * to do? + * As such, I have #ifded'd it notnow. + */ + if (fsflags & MNT_EXPORTED) { + error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED); + if (error) + return (error); + } +#endif } if (fsflags & MNT_SUIDDIR) { error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR); diff --git a/sys/nfs/nfs_nfssvc.c.vnet b/sys/nfs/nfs_nfssvc.c --- a/sys/nfs/nfs_nfssvc.c.vnet +++ b/sys/nfs/nfs_nfssvc.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,8 @@ #include #include +#include + #include #include @@ -90,6 +93,11 @@ if (error != 0) return (error); } + +#ifdef VNET_NFSD + CURVNET_SET(TD_TO_VNET(td)); +#endif + error = EINVAL; if ((uap->flag & (NFSSVC_ADDSOCK | NFSSVC_OLDNFSD | NFSSVC_NFSD)) && nfsd_call_nfsserver != NULL) @@ -111,6 +119,9 @@ error = (*nfsd_call_nfsd)(td, uap); if (error == EINTR || error == ERESTART) error = 0; +#ifdef VNET_NFSD + CURVNET_RESTORE(); +#endif return (error); } diff --git a/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c.vnet b/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c --- a/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c.vnet +++ b/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c @@ -478,7 +478,7 @@ cr->cr_uid = cr->cr_ruid = cr->cr_svuid = uc->uid; cr->cr_rgid = cr->cr_svgid = uc->gid; crsetgroups(cr, uc->gidlen, uc->gidlist); - cr->cr_prison = &prison0; + cr->cr_prison = curthread->td_ucred->cr_prison; prison_hold(cr->cr_prison); *crp = crhold(cr); diff --git a/sys/rpc/svc.c.vnetdcl b/sys/rpc/svc.c --- a/sys/rpc/svc.c.vnetdcl +++ b/sys/rpc/svc.c @@ -48,6 +48,7 @@ */ #include +#include #include #include #include @@ -126,7 +127,7 @@ pool->sp_space_low = (pool->sp_space_high / 3) * 2; sysctl_ctx_init(&pool->sp_sysctl); - if (sysctl_base) { + if (!jailed(curthread->td_ucred) && sysctl_base) { SYSCTL_ADD_PROC(&pool->sp_sysctl, sysctl_base, OID_AUTO, "minthreads", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, pool, 0, svcpool_minthread_sysctl, "I", diff --git a/sys/rpc/svc_auth.c.vnet b/sys/rpc/svc_auth.c --- a/sys/rpc/svc_auth.c.vnet +++ b/sys/rpc/svc_auth.c @@ -197,7 +197,7 @@ cr->cr_uid = cr->cr_ruid = cr->cr_svuid = xprt->xp_uid; crsetgroups(cr, xprt->xp_ngrps, xprt->xp_gidp); cr->cr_rgid = cr->cr_svgid = xprt->xp_gidp[0]; - cr->cr_prison = &prison0; + cr->cr_prison = curthread->td_ucred->cr_prison; prison_hold(cr->cr_prison); *crp = cr; return (TRUE); @@ -210,7 +210,7 @@ cr->cr_uid = cr->cr_ruid = cr->cr_svuid = xcr->cr_uid; crsetgroups(cr, xcr->cr_ngroups, xcr->cr_groups); cr->cr_rgid = cr->cr_svgid = cr->cr_groups[0]; - cr->cr_prison = &prison0; + cr->cr_prison = curthread->td_ucred->cr_prison; prison_hold(cr->cr_prison); *crp = cr; return (TRUE); diff --git a/sys/rpc/svc_dg.c.vnet b/sys/rpc/svc_dg.c --- a/sys/rpc/svc_dg.c.vnet +++ b/sys/rpc/svc_dg.c @@ -45,6 +45,7 @@ */ #include +#include #include #include #include @@ -104,6 +105,8 @@ struct sockaddr* sa; int error; + if (jailed(curthread->td_ucred)) + return (NULL); if (!__rpc_socket2sockinfo(so, &si)) { printf(svc_dg_str, svc_dg_err1); return (NULL); diff --git a/sys/sys/jail.h.vnet b/sys/sys/jail.h --- a/sys/sys/jail.h.vnet +++ b/sys/sys/jail.h @@ -253,7 +253,8 @@ #define PR_ALLOW_SUSER 0x00000400 #define PR_ALLOW_RESERVED_PORTS 0x00008000 #define PR_ALLOW_KMEM_ACCESS 0x00010000 /* reserved, not used yet */ -#define PR_ALLOW_ALL_STATIC 0x000187ff +#define PR_ALLOW_NFSD 0x00020000 +#define PR_ALLOW_ALL_STATIC 0x000387ff /* * PR_ALLOW_DIFFERENCES determines which flags are able to be @@ -420,6 +421,7 @@ void prison0_init(void); int prison_allow(struct ucred *, unsigned); int prison_check(struct ucred *cred1, struct ucred *cred2); +bool prison_check_nfsd(struct ucred *cred); int prison_owns_vnet(struct ucred *); int prison_canseemount(struct ucred *cred, struct mount *mp); void prison_enforce_statfs(struct ucred *cred, struct mount *mp,