Index: projects/nfs-over-tls/sys/fs/nfs/nfs_commonport.c =================================================================== --- projects/nfs-over-tls/sys/fs/nfs/nfs_commonport.c (revision 360215) +++ projects/nfs-over-tls/sys/fs/nfs/nfs_commonport.c (revision 360216) @@ -1,981 +1,981 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Functions that need to be different for different versions of BSD * kernel should be kept here, along with any global storage specific * to this BSD variant. */ #include #include #include #include #include #include #include #include #include #include #include #include #include extern int nfscl_ticks; extern nfsuserd_state nfsrv_nfsuserd; extern struct nfssockreq nfsrv_nfsuserdsock; extern void (*nfsd_call_recall)(struct vnode *, int, struct ucred *, struct thread *); extern int nfsrv_useacl; struct mount nfsv4root_mnt; int newnfs_numnfsd = 0; struct nfsstatsv1 nfsstatsv1; int nfs_numnfscbd = 0; int nfscl_debuglevel = 0; char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; struct callout newnfsd_callout; int nfsrv_lughashsize = 100; struct mtx nfsrv_dslock_mtx; struct nfsdevicehead nfsrv_devidhead; volatile int nfsrv_devidcnt = 0; void (*nfsd_call_servertimer)(void) = NULL; void (*ncl_call_invalcaches)(struct vnode *) = NULL; vop_advlock_t *nfs_advlock_p = NULL; vop_reclaim_t *nfs_reclaim_p = NULL; int nfs_pnfsio(task_fn_t *, void *); static int nfs_realign_test; static int nfs_realign_count; static struct ext_nfsstats oldnfsstats; static struct nfsstatsov1 nfsstatsov1; SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "NFS filesystem"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, "Number of realign tests done"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, "Number of mbuf realignments done"); SYSCTL_STRING(_vfs_nfs, OID_AUTO, callback_addr, CTLFLAG_RW, nfsv4_callbackaddr, sizeof(nfsv4_callbackaddr), "NFSv4 callback addr for server to use"); SYSCTL_INT(_vfs_nfs, OID_AUTO, debuglevel, CTLFLAG_RW, &nfscl_debuglevel, 0, "Debug level for NFS client"); SYSCTL_INT(_vfs_nfs, OID_AUTO, userhashsize, CTLFLAG_RDTUN, &nfsrv_lughashsize, 0, "Size of hash tables for uid/name mapping"); int nfs_pnfsiothreads = -1; SYSCTL_INT(_vfs_nfs, OID_AUTO, pnfsiothreads, CTLFLAG_RW, &nfs_pnfsiothreads, 0, "Number of pNFS mirror I/O threads"); /* * Defines for malloc * (Here for FreeBSD, since they allocate storage.) */ MALLOC_DEFINE(M_NEWNFSRVCACHE, "NFSD srvcache", "NFSD Server Request Cache"); MALLOC_DEFINE(M_NEWNFSDCLIENT, "NFSD V4client", "NFSD V4 Client Id"); MALLOC_DEFINE(M_NEWNFSDSTATE, "NFSD V4state", "NFSD V4 State (Openowner, Open, Lockowner, Delegation"); MALLOC_DEFINE(M_NEWNFSDLOCK, "NFSD V4lock", "NFSD V4 byte range lock"); MALLOC_DEFINE(M_NEWNFSDLOCKFILE, "NFSD lckfile", "NFSD Open/Lock file"); MALLOC_DEFINE(M_NEWNFSSTRING, "NFSD string", "NFSD V4 long string"); MALLOC_DEFINE(M_NEWNFSUSERGROUP, "NFSD usrgroup", "NFSD V4 User/group map"); MALLOC_DEFINE(M_NEWNFSDREQ, "NFS req", "NFS request header"); MALLOC_DEFINE(M_NEWNFSFH, "NFS fh", "NFS file handle"); MALLOC_DEFINE(M_NEWNFSCLOWNER, "NFSCL owner", "NFSCL Open Owner"); MALLOC_DEFINE(M_NEWNFSCLOPEN, "NFSCL open", "NFSCL Open"); MALLOC_DEFINE(M_NEWNFSCLDELEG, "NFSCL deleg", "NFSCL Delegation"); MALLOC_DEFINE(M_NEWNFSCLCLIENT, "NFSCL client", "NFSCL Client"); MALLOC_DEFINE(M_NEWNFSCLLOCKOWNER, "NFSCL lckown", "NFSCL Lock Owner"); MALLOC_DEFINE(M_NEWNFSCLLOCK, "NFSCL lck", "NFSCL Lock"); MALLOC_DEFINE(M_NEWNFSV4NODE, "NEWNFSnode", "NFS vnode"); MALLOC_DEFINE(M_NEWNFSDIRECTIO, "NEWdirectio", "NFS Direct IO buffer"); MALLOC_DEFINE(M_NEWNFSDIROFF, "NFSCL diroffdiroff", "NFS directory offset data"); MALLOC_DEFINE(M_NEWNFSDROLLBACK, "NFSD rollback", "NFS local lock rollback"); MALLOC_DEFINE(M_NEWNFSLAYOUT, "NFSCL layout", "NFSv4.1 Layout"); MALLOC_DEFINE(M_NEWNFSFLAYOUT, "NFSCL flayout", "NFSv4.1 File Layout"); MALLOC_DEFINE(M_NEWNFSDEVINFO, "NFSCL devinfo", "NFSv4.1 Device Info"); MALLOC_DEFINE(M_NEWNFSSOCKREQ, "NFSCL sockreq", "NFS Sock Req"); MALLOC_DEFINE(M_NEWNFSCLDS, "NFSCL session", "NFSv4.1 Session"); MALLOC_DEFINE(M_NEWNFSLAYRECALL, "NFSCL layrecall", "NFSv4.1 Layout Recall"); MALLOC_DEFINE(M_NEWNFSDSESSION, "NFSD session", "NFSD Session for a client"); /* * Definition of mutex locks. * newnfsd_mtx is used in nfsrvd_nfsd() to protect the nfs socket list * and assorted other nfsd structures. */ struct mtx newnfsd_mtx; struct mtx nfs_sockl_mutex; struct mtx nfs_state_mutex; struct mtx nfs_nameid_mutex; struct mtx nfs_req_mutex; struct mtx nfs_slock_mutex; struct mtx nfs_clstate_mutex; /* local functions */ static int nfssvc_call(struct thread *, struct nfssvc_args *, struct ucred *); #ifdef __NO_STRICT_ALIGNMENT /* * These architectures don't need re-alignment, so just return. */ int newnfs_realign(struct mbuf **pm, int how) { return (0); } #else /* !__NO_STRICT_ALIGNMENT */ /* * newnfs_realign: * * Check for badly aligned mbuf data and realign by copying the unaligned * portion of the data into a new mbuf chain and freeing the portions * of the old chain that were replaced. * * We cannot simply realign the data within the existing mbuf chain * because the underlying buffers may contain other rpc commands and * we cannot afford to overwrite them. * * We would prefer to avoid this situation entirely. The situation does * not occur with NFS/UDP and is supposed to only occasionally occur * with TCP. Use vfs.nfs.realign_count and realign_test to check this. * */ int newnfs_realign(struct mbuf **pm, int how) { struct mbuf *m, *n; int off, space; bool copyit; ++nfs_realign_test; /* * For ext_pgs mbufs, just copy the entire chain if there is an * alignment problem. */ copyit = false; m = *pm; while ((m->m_flags & M_NOMAP) != 0) { if ((m->m_len & 0x3) != 0 || - (m->m_ext.ext_pgs->first_pg_off & 0x3) != 0) { + (m->m_ext_pgs.first_pg_off & 0x3) != 0) { copyit = true; break; } m = m->m_next; if (m == NULL) return (0); } if (copyit) { m = mb_unmapped_to_ext(*pm); if (m == NULL) return (ENOMEM); *pm = m; return (0); } while ((m = *pm) != NULL) { if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { /* * NB: we can't depend on m_pkthdr.len to help us * decide what to do here. May not be worth doing * the m_length calculation as m_copyback will * expand the mbuf chain below as needed. */ space = m_length(m, NULL); if (space >= MINCLSIZE) { /* NB: m_copyback handles space > MCLBYTES */ n = m_getcl(how, MT_DATA, 0); } else n = m_get(how, MT_DATA); if (n == NULL) return (ENOMEM); /* * Align the remainder of the mbuf chain. */ n->m_len = 0; off = 0; while (m != NULL) { m_copyback(n, off, m->m_len, mtod(m, caddr_t)); off += m->m_len; m = m->m_next; } m_freem(*pm); *pm = n; ++nfs_realign_count; break; } pm = &m->m_next; } return (0); } #endif /* __NO_STRICT_ALIGNMENT */ #ifdef notdef static void nfsrv_object_create(struct vnode *vp, struct thread *td) { if (vp == NULL || vp->v_type != VREG) return; (void) vfs_object_create(vp, td, td->td_ucred); } #endif /* * Look up a file name. Basically just initialize stuff and call namei(). */ int nfsrv_lookupfilename(struct nameidata *ndp, char *fname, NFSPROC_T *p) { int error; NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fname, p); error = namei(ndp); if (!error) { NDFREE(ndp, NDF_ONLY_PNBUF); } return (error); } /* * Copy NFS uid, gids to the cred structure. */ void newnfs_copycred(struct nfscred *nfscr, struct ucred *cr) { KASSERT(nfscr->nfsc_ngroups >= 0, ("newnfs_copycred: negative nfsc_ngroups")); cr->cr_uid = nfscr->nfsc_uid; crsetgroups(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups); } /* * Map args from nfsmsleep() to msleep(). */ int nfsmsleep(void *chan, void *mutex, int prio, const char *wmesg, struct timespec *ts) { u_int64_t nsecval; int error, timeo; if (ts) { timeo = hz * ts->tv_sec; nsecval = (u_int64_t)ts->tv_nsec; nsecval = ((nsecval * ((u_int64_t)hz)) + 500000000) / 1000000000; timeo += (int)nsecval; } else { timeo = 0; } error = msleep(chan, (struct mtx *)mutex, prio, wmesg, timeo); return (error); } /* * Get the file system info for the server. For now, just assume FFS. */ void nfsvno_getfs(struct nfsfsinfo *sip, int isdgram) { int pref; /* * XXX * There should be file system VFS OP(s) to get this information. * For now, assume ufs. */ if (isdgram) pref = NFS_MAXDGRAMDATA; else pref = NFS_SRVMAXIO; sip->fs_rtmax = NFS_SRVMAXIO; sip->fs_rtpref = pref; sip->fs_rtmult = NFS_FABLKSIZE; sip->fs_wtmax = NFS_SRVMAXIO; sip->fs_wtpref = pref; sip->fs_wtmult = NFS_FABLKSIZE; sip->fs_dtpref = pref; sip->fs_maxfilesize = 0xffffffffffffffffull; sip->fs_timedelta.tv_sec = 0; sip->fs_timedelta.tv_nsec = 1; sip->fs_properties = (NFSV3FSINFO_LINK | NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); } /* * Do the pathconf vnode op. */ int nfsvno_pathconf(struct vnode *vp, int flag, long *retf, struct ucred *cred, struct thread *p) { int error; error = VOP_PATHCONF(vp, flag, retf); if (error == EOPNOTSUPP || error == EINVAL) { /* * Some file systems return EINVAL for name arguments not * supported and some return EOPNOTSUPP for this case. * So the NFSv3 Pathconf RPC doesn't fail for these cases, * just fake them. */ switch (flag) { case _PC_LINK_MAX: *retf = NFS_LINK_MAX; break; case _PC_NAME_MAX: *retf = NAME_MAX; break; case _PC_CHOWN_RESTRICTED: *retf = 1; break; case _PC_NO_TRUNC: *retf = 1; break; default: /* * Only happens if a _PC_xxx is added to the server, * but this isn't updated. */ *retf = 0; printf("nfsrvd pathconf flag=%d not supp\n", flag); } error = 0; } NFSEXITCODE(error); return (error); } /* Fake nfsrv_atroot. Just return 0 */ int nfsrv_atroot(struct vnode *vp, uint64_t *retp) { return (0); } /* * Set the credentials to refer to root. * If only the various BSDen could agree on whether cr_gid is a separate * field or cr_groups[0]... */ void newnfs_setroot(struct ucred *cred) { cred->cr_uid = 0; cred->cr_groups[0] = 0; cred->cr_ngroups = 1; } /* * Get the client credential. Used for Renew and recovery. */ struct ucred * newnfs_getcred(void) { struct ucred *cred; struct thread *td = curthread; cred = crdup(td->td_ucred); newnfs_setroot(cred); return (cred); } /* * Nfs timer routine * Call the nfsd's timer function once/sec. */ void newnfs_timer(void *arg) { static time_t lasttime = 0; /* * Call the server timer, if set up. * The argument indicates if it is the next second and therefore * leases should be checked. */ if (lasttime != NFSD_MONOSEC) { lasttime = NFSD_MONOSEC; if (nfsd_call_servertimer != NULL) (*nfsd_call_servertimer)(); } callout_reset(&newnfsd_callout, nfscl_ticks, newnfs_timer, NULL); } /* * Sleep for a short period of time unless errval == NFSERR_GRACE, where * the sleep should be for 5 seconds. * Since lbolt doesn't exist in FreeBSD-CURRENT, just use a timeout on * an event that never gets a wakeup. Only return EINTR or 0. */ int nfs_catnap(int prio, int errval, const char *wmesg) { static int non_event; int ret; if (errval == NFSERR_GRACE) ret = tsleep(&non_event, prio, wmesg, 5 * hz); else ret = tsleep(&non_event, prio, wmesg, 1); if (ret != EINTR) ret = 0; return (ret); } /* * Get referral. For now, just fail. */ struct nfsreferral * nfsv4root_getreferral(struct vnode *vp, struct vnode *dvp, u_int32_t fileno) { return (NULL); } static int nfssvc_nfscommon(struct thread *td, struct nfssvc_args *uap) { int error; error = nfssvc_call(td, uap, td->td_ucred); NFSEXITCODE(error); return (error); } static int nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { int error = EINVAL, i, j; struct nfsd_idargs nid; struct nfsd_oidargs onid; struct { int vers; /* Just the first field of nfsstats. */ } nfsstatver; if (uap->flag & NFSSVC_IDNAME) { if ((uap->flag & NFSSVC_NEWSTRUCT) != 0) error = copyin(uap->argp, &nid, sizeof(nid)); else { error = copyin(uap->argp, &onid, sizeof(onid)); if (error == 0) { nid.nid_flag = onid.nid_flag; nid.nid_uid = onid.nid_uid; nid.nid_gid = onid.nid_gid; nid.nid_usermax = onid.nid_usermax; nid.nid_usertimeout = onid.nid_usertimeout; nid.nid_name = onid.nid_name; nid.nid_namelen = onid.nid_namelen; nid.nid_ngroup = 0; nid.nid_grps = NULL; } } if (error) goto out; error = nfssvc_idname(&nid); goto out; } else if (uap->flag & NFSSVC_GETSTATS) { if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { /* Copy fields to the old ext_nfsstat structure. */ oldnfsstats.attrcache_hits = nfsstatsv1.attrcache_hits; oldnfsstats.attrcache_misses = nfsstatsv1.attrcache_misses; oldnfsstats.lookupcache_hits = nfsstatsv1.lookupcache_hits; oldnfsstats.lookupcache_misses = nfsstatsv1.lookupcache_misses; oldnfsstats.direofcache_hits = nfsstatsv1.direofcache_hits; oldnfsstats.direofcache_misses = nfsstatsv1.direofcache_misses; oldnfsstats.accesscache_hits = nfsstatsv1.accesscache_hits; oldnfsstats.accesscache_misses = nfsstatsv1.accesscache_misses; oldnfsstats.biocache_reads = nfsstatsv1.biocache_reads; oldnfsstats.read_bios = nfsstatsv1.read_bios; oldnfsstats.read_physios = nfsstatsv1.read_physios; oldnfsstats.biocache_writes = nfsstatsv1.biocache_writes; oldnfsstats.write_bios = nfsstatsv1.write_bios; oldnfsstats.write_physios = nfsstatsv1.write_physios; oldnfsstats.biocache_readlinks = nfsstatsv1.biocache_readlinks; oldnfsstats.readlink_bios = nfsstatsv1.readlink_bios; oldnfsstats.biocache_readdirs = nfsstatsv1.biocache_readdirs; oldnfsstats.readdir_bios = nfsstatsv1.readdir_bios; for (i = 0; i < NFSV4_NPROCS; i++) oldnfsstats.rpccnt[i] = nfsstatsv1.rpccnt[i]; oldnfsstats.rpcretries = nfsstatsv1.rpcretries; for (i = 0; i < NFSV4OP_NOPS; i++) oldnfsstats.srvrpccnt[i] = nfsstatsv1.srvrpccnt[i]; for (i = NFSV42_NOPS, j = NFSV4OP_NOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) oldnfsstats.srvrpccnt[j] = nfsstatsv1.srvrpccnt[i]; oldnfsstats.srvrpc_errs = nfsstatsv1.srvrpc_errs; oldnfsstats.srv_errs = nfsstatsv1.srv_errs; oldnfsstats.rpcrequests = nfsstatsv1.rpcrequests; oldnfsstats.rpctimeouts = nfsstatsv1.rpctimeouts; oldnfsstats.rpcunexpected = nfsstatsv1.rpcunexpected; oldnfsstats.rpcinvalid = nfsstatsv1.rpcinvalid; oldnfsstats.srvcache_inproghits = nfsstatsv1.srvcache_inproghits; oldnfsstats.srvcache_idemdonehits = nfsstatsv1.srvcache_idemdonehits; oldnfsstats.srvcache_nonidemdonehits = nfsstatsv1.srvcache_nonidemdonehits; oldnfsstats.srvcache_misses = nfsstatsv1.srvcache_misses; oldnfsstats.srvcache_tcppeak = nfsstatsv1.srvcache_tcppeak; oldnfsstats.srvcache_size = nfsstatsv1.srvcache_size; oldnfsstats.srvclients = nfsstatsv1.srvclients; oldnfsstats.srvopenowners = nfsstatsv1.srvopenowners; oldnfsstats.srvopens = nfsstatsv1.srvopens; oldnfsstats.srvlockowners = nfsstatsv1.srvlockowners; oldnfsstats.srvlocks = nfsstatsv1.srvlocks; oldnfsstats.srvdelegates = nfsstatsv1.srvdelegates; for (i = 0; i < NFSV4OP_CBNOPS; i++) oldnfsstats.cbrpccnt[i] = nfsstatsv1.cbrpccnt[i]; oldnfsstats.clopenowners = nfsstatsv1.clopenowners; oldnfsstats.clopens = nfsstatsv1.clopens; oldnfsstats.cllockowners = nfsstatsv1.cllockowners; oldnfsstats.cllocks = nfsstatsv1.cllocks; oldnfsstats.cldelegates = nfsstatsv1.cldelegates; oldnfsstats.cllocalopenowners = nfsstatsv1.cllocalopenowners; oldnfsstats.cllocalopens = nfsstatsv1.cllocalopens; oldnfsstats.cllocallockowners = nfsstatsv1.cllocallockowners; oldnfsstats.cllocallocks = nfsstatsv1.cllocallocks; error = copyout(&oldnfsstats, uap->argp, sizeof (oldnfsstats)); } else { error = copyin(uap->argp, &nfsstatver, sizeof(nfsstatver)); if (error == 0) { if (nfsstatver.vers == NFSSTATS_OV1) { /* Copy nfsstatsv1 to nfsstatsov1. */ nfsstatsov1.attrcache_hits = nfsstatsv1.attrcache_hits; nfsstatsov1.attrcache_misses = nfsstatsv1.attrcache_misses; nfsstatsov1.lookupcache_hits = nfsstatsv1.lookupcache_hits; nfsstatsov1.lookupcache_misses = nfsstatsv1.lookupcache_misses; nfsstatsov1.direofcache_hits = nfsstatsv1.direofcache_hits; nfsstatsov1.direofcache_misses = nfsstatsv1.direofcache_misses; nfsstatsov1.accesscache_hits = nfsstatsv1.accesscache_hits; nfsstatsov1.accesscache_misses = nfsstatsv1.accesscache_misses; nfsstatsov1.biocache_reads = nfsstatsv1.biocache_reads; nfsstatsov1.read_bios = nfsstatsv1.read_bios; nfsstatsov1.read_physios = nfsstatsv1.read_physios; nfsstatsov1.biocache_writes = nfsstatsv1.biocache_writes; nfsstatsov1.write_bios = nfsstatsv1.write_bios; nfsstatsov1.write_physios = nfsstatsv1.write_physios; nfsstatsov1.biocache_readlinks = nfsstatsv1.biocache_readlinks; nfsstatsov1.readlink_bios = nfsstatsv1.readlink_bios; nfsstatsov1.biocache_readdirs = nfsstatsv1.biocache_readdirs; nfsstatsov1.readdir_bios = nfsstatsv1.readdir_bios; for (i = 0; i < NFSV42_NPROCS; i++) nfsstatsov1.rpccnt[i] = nfsstatsv1.rpccnt[i]; nfsstatsov1.rpcretries = nfsstatsv1.rpcretries; for (i = 0; i < NFSV42_PURENOPS; i++) nfsstatsov1.srvrpccnt[i] = nfsstatsv1.srvrpccnt[i]; for (i = NFSV42_NOPS, j = NFSV42_PURENOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) nfsstatsov1.srvrpccnt[j] = nfsstatsv1.srvrpccnt[i]; nfsstatsov1.srvrpc_errs = nfsstatsv1.srvrpc_errs; nfsstatsov1.srv_errs = nfsstatsv1.srv_errs; nfsstatsov1.rpcrequests = nfsstatsv1.rpcrequests; nfsstatsov1.rpctimeouts = nfsstatsv1.rpctimeouts; nfsstatsov1.rpcunexpected = nfsstatsv1.rpcunexpected; nfsstatsov1.rpcinvalid = nfsstatsv1.rpcinvalid; nfsstatsov1.srvcache_inproghits = nfsstatsv1.srvcache_inproghits; nfsstatsov1.srvcache_idemdonehits = nfsstatsv1.srvcache_idemdonehits; nfsstatsov1.srvcache_nonidemdonehits = nfsstatsv1.srvcache_nonidemdonehits; nfsstatsov1.srvcache_misses = nfsstatsv1.srvcache_misses; nfsstatsov1.srvcache_tcppeak = nfsstatsv1.srvcache_tcppeak; nfsstatsov1.srvcache_size = nfsstatsv1.srvcache_size; nfsstatsov1.srvclients = nfsstatsv1.srvclients; nfsstatsov1.srvopenowners = nfsstatsv1.srvopenowners; nfsstatsov1.srvopens = nfsstatsv1.srvopens; nfsstatsov1.srvlockowners = nfsstatsv1.srvlockowners; nfsstatsov1.srvlocks = nfsstatsv1.srvlocks; nfsstatsov1.srvdelegates = nfsstatsv1.srvdelegates; for (i = 0; i < NFSV42_CBNOPS; i++) nfsstatsov1.cbrpccnt[i] = nfsstatsv1.cbrpccnt[i]; nfsstatsov1.clopenowners = nfsstatsv1.clopenowners; nfsstatsov1.clopens = nfsstatsv1.clopens; nfsstatsov1.cllockowners = nfsstatsv1.cllockowners; nfsstatsov1.cllocks = nfsstatsv1.cllocks; nfsstatsov1.cldelegates = nfsstatsv1.cldelegates; nfsstatsov1.cllocalopenowners = nfsstatsv1.cllocalopenowners; nfsstatsov1.cllocalopens = nfsstatsv1.cllocalopens; nfsstatsov1.cllocallockowners = nfsstatsv1.cllocallockowners; nfsstatsov1.cllocallocks = nfsstatsv1.cllocallocks; nfsstatsov1.srvstartcnt = nfsstatsv1.srvstartcnt; nfsstatsov1.srvdonecnt = nfsstatsv1.srvdonecnt; for (i = NFSV42_NOPS, j = NFSV42_PURENOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) { nfsstatsov1.srvbytes[j] = nfsstatsv1.srvbytes[i]; nfsstatsov1.srvops[j] = nfsstatsv1.srvops[i]; nfsstatsov1.srvduration[j] = nfsstatsv1.srvduration[i]; } nfsstatsov1.busyfrom = nfsstatsv1.busyfrom; nfsstatsov1.busyfrom = nfsstatsv1.busyfrom; error = copyout(&nfsstatsov1, uap->argp, sizeof(nfsstatsov1)); } else if (nfsstatver.vers != NFSSTATS_V1) error = EPERM; else error = copyout(&nfsstatsv1, uap->argp, sizeof(nfsstatsv1)); } } if (error == 0) { if ((uap->flag & NFSSVC_ZEROCLTSTATS) != 0) { nfsstatsv1.attrcache_hits = 0; nfsstatsv1.attrcache_misses = 0; nfsstatsv1.lookupcache_hits = 0; nfsstatsv1.lookupcache_misses = 0; nfsstatsv1.direofcache_hits = 0; nfsstatsv1.direofcache_misses = 0; nfsstatsv1.accesscache_hits = 0; nfsstatsv1.accesscache_misses = 0; nfsstatsv1.biocache_reads = 0; nfsstatsv1.read_bios = 0; nfsstatsv1.read_physios = 0; nfsstatsv1.biocache_writes = 0; nfsstatsv1.write_bios = 0; nfsstatsv1.write_physios = 0; nfsstatsv1.biocache_readlinks = 0; nfsstatsv1.readlink_bios = 0; nfsstatsv1.biocache_readdirs = 0; nfsstatsv1.readdir_bios = 0; nfsstatsv1.rpcretries = 0; nfsstatsv1.rpcrequests = 0; nfsstatsv1.rpctimeouts = 0; nfsstatsv1.rpcunexpected = 0; nfsstatsv1.rpcinvalid = 0; bzero(nfsstatsv1.rpccnt, sizeof(nfsstatsv1.rpccnt)); } if ((uap->flag & NFSSVC_ZEROSRVSTATS) != 0) { nfsstatsv1.srvrpc_errs = 0; nfsstatsv1.srv_errs = 0; nfsstatsv1.srvcache_inproghits = 0; nfsstatsv1.srvcache_idemdonehits = 0; nfsstatsv1.srvcache_nonidemdonehits = 0; nfsstatsv1.srvcache_misses = 0; nfsstatsv1.srvcache_tcppeak = 0; bzero(nfsstatsv1.srvrpccnt, sizeof(nfsstatsv1.srvrpccnt)); bzero(nfsstatsv1.cbrpccnt, sizeof(nfsstatsv1.cbrpccnt)); } } goto out; } else if (uap->flag & NFSSVC_NFSUSERDPORT) { u_short sockport; struct nfsuserd_args nargs; if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { error = copyin(uap->argp, (caddr_t)&sockport, sizeof (u_short)); if (error == 0) { nargs.nuserd_family = AF_INET; nargs.nuserd_port = sockport; } } else { /* * New nfsuserd_args structure, which indicates * which IP version to use along with the port#. */ error = copyin(uap->argp, &nargs, sizeof(nargs)); } if (!error) error = nfsrv_nfsuserdport(&nargs, p); } else if (uap->flag & NFSSVC_NFSUSERDDELPORT) { nfsrv_nfsuserddelport(); error = 0; } out: NFSEXITCODE(error); return (error); } /* * called by all three modevent routines, so that it gets things * initialized soon enough. */ void newnfs_portinit(void) { static int inited = 0; if (inited) return; inited = 1; /* Initialize SMP locks used by both client and server. */ mtx_init(&newnfsd_mtx, "newnfsd_mtx", NULL, MTX_DEF); mtx_init(&nfs_state_mutex, "nfs_state_mutex", NULL, MTX_DEF); mtx_init(&nfs_clstate_mutex, "nfs_clstate_mutex", NULL, MTX_DEF); } /* * Determine if the file system supports NFSv4 ACLs. * Return 1 if it does, 0 otherwise. */ int nfs_supportsnfsv4acls(struct vnode *vp) { int error; long retval; ASSERT_VOP_LOCKED(vp, "nfs supports nfsv4acls"); if (nfsrv_useacl == 0) return (0); error = VOP_PATHCONF(vp, _PC_ACL_NFS4, &retval); if (error == 0 && retval != 0) return (1); return (0); } /* * These are the first fields of all the context structures passed into * nfs_pnfsio(). */ struct pnfsio { int done; int inprog; struct task tsk; }; /* * Do a mirror I/O on a pNFS thread. */ int nfs_pnfsio(task_fn_t *func, void *context) { struct pnfsio *pio; int ret; static struct taskqueue *pnfsioq = NULL; pio = (struct pnfsio *)context; if (pnfsioq == NULL) { if (nfs_pnfsiothreads == 0) return (EPERM); if (nfs_pnfsiothreads < 0) nfs_pnfsiothreads = mp_ncpus * 4; pnfsioq = taskqueue_create("pnfsioq", M_WAITOK, taskqueue_thread_enqueue, &pnfsioq); if (pnfsioq == NULL) return (ENOMEM); ret = taskqueue_start_threads(&pnfsioq, nfs_pnfsiothreads, 0, "pnfsiot"); if (ret != 0) { taskqueue_free(pnfsioq); pnfsioq = NULL; return (ret); } } pio->inprog = 1; TASK_INIT(&pio->tsk, 0, func, context); ret = taskqueue_enqueue(pnfsioq, &pio->tsk); if (ret != 0) pio->inprog = 0; return (ret); } extern int (*nfsd_call_nfscommon)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfscommon_modevent(module_t mod, int type, void *data) { int error = 0; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) goto out; newnfs_portinit(); mtx_init(&nfs_nameid_mutex, "nfs_nameid_mutex", NULL, MTX_DEF); mtx_init(&nfs_sockl_mutex, "nfs_sockl_mutex", NULL, MTX_DEF); mtx_init(&nfs_slock_mutex, "nfs_slock_mutex", NULL, MTX_DEF); mtx_init(&nfs_req_mutex, "nfs_req_mutex", NULL, MTX_DEF); mtx_init(&nfsrv_nfsuserdsock.nr_mtx, "nfsuserd", NULL, MTX_DEF); mtx_init(&nfsrv_dslock_mtx, "nfs4ds", NULL, MTX_DEF); TAILQ_INIT(&nfsrv_devidhead); callout_init(&newnfsd_callout, 1); newnfs_init(); nfsd_call_nfscommon = nfssvc_nfscommon; loaded = 1; break; case MOD_UNLOAD: if (newnfs_numnfsd != 0 || nfsrv_nfsuserd != NOTRUNNING || nfs_numnfscbd != 0) { error = EBUSY; break; } nfsd_call_nfscommon = NULL; callout_drain(&newnfsd_callout); /* Clean out the name<-->id cache. */ nfsrv_cleanusergroup(); /* and get rid of the mutexes */ mtx_destroy(&nfs_nameid_mutex); mtx_destroy(&newnfsd_mtx); mtx_destroy(&nfs_state_mutex); mtx_destroy(&nfs_clstate_mutex); mtx_destroy(&nfs_sockl_mutex); mtx_destroy(&nfs_slock_mutex); mtx_destroy(&nfs_req_mutex); mtx_destroy(&nfsrv_nfsuserdsock.nr_mtx); mtx_destroy(&nfsrv_dslock_mtx); loaded = 0; break; default: error = EOPNOTSUPP; break; } out: NFSEXITCODE(error); return error; } static moduledata_t nfscommon_mod = { "nfscommon", nfscommon_modevent, NULL, }; DECLARE_MODULE(nfscommon, nfscommon_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfscommon, 1); MODULE_DEPEND(nfscommon, nfssvc, 1, 1, 1); MODULE_DEPEND(nfscommon, krpc, 1, 1, 1); Index: projects/nfs-over-tls/sys/fs/nfs/nfs_commonsubs.c =================================================================== --- projects/nfs-over-tls/sys/fs/nfs/nfs_commonsubs.c (revision 360215) +++ projects/nfs-over-tls/sys/fs/nfs/nfs_commonsubs.c (revision 360216) @@ -1,5159 +1,5158 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #ifndef APPLEKEXT #include "opt_inet.h" #include "opt_inet6.h" #include "opt_kern_tls.h" #include #include #include #include /* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps */ u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; /* And other global data */ nfstype nfsv34_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; enum vtype newnv2tov_type[8] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; enum vtype nv34tov_type[8]={ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; struct timeval nfsboottime; /* Copy boottime once, so it never changes */ int nfscl_ticks; int nfsrv_useacl = 1; struct nfssockreq nfsrv_nfsuserdsock; nfsuserd_state nfsrv_nfsuserd = NOTRUNNING; static int nfsrv_userdupcalls = 0; struct nfsreqhead nfsd_reqq; uid_t nfsrv_defaultuid = UID_NOBODY; gid_t nfsrv_defaultgid = GID_NOGROUP; int nfsrv_lease = NFSRV_LEASE; int ncl_mbuf_mlen = MLEN; int nfsd_enable_stringtouid = 0; int nfsrv_doflexfile = 0; static int nfs_enable_uidtostring = 0; NFSNAMEIDMUTEX; NFSSOCKMUTEX; extern int nfsrv_lughashsize; extern struct mtx nfsrv_dslock_mtx; extern volatile int nfsrv_devidcnt; extern int nfscl_debuglevel; extern struct nfsdevicehead nfsrv_devidhead; extern struct nfsstatsv1 nfsstatsv1; #ifdef KERN_TLS extern u_int ktls_maxlen; #endif SYSCTL_DECL(_vfs_nfs); SYSCTL_INT(_vfs_nfs, OID_AUTO, enable_uidtostring, CTLFLAG_RW, &nfs_enable_uidtostring, 0, "Make nfs always send numeric owner_names"); int nfsrv_maxpnfsmirror = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, pnfsmirror, CTLFLAG_RD, &nfsrv_maxpnfsmirror, 0, "Mirror level for pNFS service"); int nfs_maxcopyrange = 10 * 1024 * 1024; SYSCTL_INT(_vfs_nfs, OID_AUTO, maxcopyrange, CTLFLAG_RW, &nfs_maxcopyrange, 0, "Max size of a Copy so RPC times reasonable"); bool nfs_use_ext_pgs = false; SYSCTL_BOOL(_vfs_nfs, OID_AUTO, use_ext_pgs, CTLFLAG_RW, &nfs_use_ext_pgs, 0, "Set true to use TCP"); /* * This array of structures indicates, for V4: * retfh - which of 3 types of calling args are used * 0 - doesn't change cfh or use a sfh * 1 - replaces cfh with a new one (unless it returns an error status) * 2 - uses cfh and sfh * needscfh - if the op wants a cfh and premtime * 0 - doesn't use a cfh * 1 - uses a cfh, but doesn't want pre-op attributes * 2 - uses a cfh and wants pre-op attributes * savereply - indicates a non-idempotent Op * 0 - not non-idempotent * 1 - non-idempotent * Ops that are ordered via seqid# are handled separately from these * non-idempotent Ops. * Define it here, since it is used by both the client and server. */ struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS] = { { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Access */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Close */ { 0, 2, 0, 1, LK_EXCLUSIVE, 1, 1 }, /* Commit */ { 1, 2, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Create */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Delegpurge */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Delegreturn */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Getattr */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* GetFH */ { 2, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Link */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Lock */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* LockT */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* LockU */ { 1, 2, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Lookup */ { 1, 2, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Lookupp */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* NVerify */ { 1, 1, 0, 1, LK_EXCLUSIVE, 1, 0 }, /* Open */ { 1, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenAttr */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenConfirm */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenDowngrade */ { 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutFH */ { 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutPubFH */ { 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutRootFH */ { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* Read */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Readdir */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* ReadLink */ { 0, 2, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Remove */ { 2, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Rename */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Renew */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* RestoreFH */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SaveFH */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SecInfo */ { 0, 2, 1, 1, LK_EXCLUSIVE, 1, 0 }, /* Setattr */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SetClientID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SetClientIDConfirm */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Verify */ { 0, 2, 1, 1, LK_EXCLUSIVE, 1, 0 }, /* Write */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* ReleaseLockOwner */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Backchannel Ctrl */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Bind Conn to Sess */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Exchange ID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Create Session */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Destroy Session */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Free StateID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Dir Deleg */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Device Info */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Device List */ { 0, 1, 0, 1, LK_EXCLUSIVE, 1, 1 }, /* Layout Commit */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Layout Get */ { 0, 1, 0, 1, LK_EXCLUSIVE, 1, 0 }, /* Layout Return */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Secinfo No name */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Sequence */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Set SSV */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Test StateID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Want Delegation */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Destroy ClientID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Reclaim Complete */ { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 0 }, /* Allocate */ { 2, 1, 1, 0, LK_SHARED, 1, 0 }, /* Copy */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Copy Notify */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Deallocate */ { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* IO Advise */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Layout Error */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Layout Stats */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Offload Cancel */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Offload Status */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Read Plus */ { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* Seek */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Write Same */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Clone */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Getxattr */ { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Setxattr */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Listxattrs */ { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Removexattr */ }; #endif /* !APPLEKEXT */ static int ncl_mbuf_mhlen = MHLEN; static int nfsrv_usercnt = 0; static int nfsrv_dnsnamelen; static u_char *nfsrv_dnsname = NULL; static int nfsrv_usermax = 999999999; struct nfsrv_lughash { struct mtx mtx; struct nfsuserhashhead lughead; }; static struct nfsrv_lughash *nfsuserhash; static struct nfsrv_lughash *nfsusernamehash; static struct nfsrv_lughash *nfsgrouphash; static struct nfsrv_lughash *nfsgroupnamehash; /* * This static array indicates whether or not the RPC generates a large * reply. This is used by nfs_reply() to decide whether or not an mbuf * cluster should be allocated. (If a cluster is required by an RPC * marked 0 in this array, the code will still work, just not quite as * efficiently.) */ static int nfs_bigreply[NFSV42_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1 }; /* local functions */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep); static void nfsv4_wanted(struct nfsv4lock *lp); static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len); static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name); static void nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser); static int nfsrv_getrefstr(struct nfsrv_descript *, u_char **, u_char **, int *, int *); static void nfsrv_refstrbigenough(int, u_char **, u_char **, int *); static int nfsm_copyfrommbuf(struct nfsrv_descript *, char *, enum uio_seg, int); static int nfsm_copyfrommbuf_extpgs(struct nfsrv_descript *, char *, enum uio_seg, int); static struct mbuf *nfsm_splitatpgno(struct mbuf *, int, int); static struct { int op; int opcnt; const u_char *tag; int taglen; } nfsv4_opmap[NFSV42_NPROCS] = { { 0, 1, "Null", 4 }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_SETATTR, 2, "Setattr", 7, }, { NFSV4OP_LOOKUP, 3, "Lookup", 6, }, { NFSV4OP_ACCESS, 2, "Access", 6, }, { NFSV4OP_READLINK, 2, "Readlink", 8, }, { NFSV4OP_READ, 1, "Read", 4, }, { NFSV4OP_WRITE, 2, "Write", 5, }, { NFSV4OP_OPEN, 5, "Open", 4, }, { NFSV4OP_CREATE, 5, "Create", 6, }, { NFSV4OP_CREATE, 1, "Create", 6, }, { NFSV4OP_CREATE, 3, "Create", 6, }, { NFSV4OP_REMOVE, 1, "Remove", 6, }, { NFSV4OP_REMOVE, 1, "Remove", 6, }, { NFSV4OP_SAVEFH, 5, "Rename", 6, }, { NFSV4OP_SAVEFH, 4, "Link", 4, }, { NFSV4OP_READDIR, 2, "Readdir", 7, }, { NFSV4OP_READDIR, 2, "Readdir", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_COMMIT, 2, "Commit", 6, }, { NFSV4OP_LOOKUPP, 3, "Lookupp", 7, }, { NFSV4OP_SETCLIENTID, 1, "SetClientID", 11, }, { NFSV4OP_SETCLIENTIDCFRM, 1, "SetClientIDConfirm", 18, }, { NFSV4OP_LOCK, 1, "Lock", 4, }, { NFSV4OP_LOCKU, 1, "LockU", 5, }, { NFSV4OP_OPEN, 2, "Open", 4, }, { NFSV4OP_CLOSE, 1, "Close", 5, }, { NFSV4OP_OPENCONFIRM, 1, "Openconfirm", 11, }, { NFSV4OP_LOCKT, 1, "LockT", 5, }, { NFSV4OP_OPENDOWNGRADE, 1, "Opendowngrade", 13, }, { NFSV4OP_RENEW, 1, "Renew", 5, }, { NFSV4OP_PUTROOTFH, 1, "Dirpath", 7, }, { NFSV4OP_RELEASELCKOWN, 1, "Rellckown", 9, }, { NFSV4OP_DELEGRETURN, 1, "Delegret", 8, }, { NFSV4OP_DELEGRETURN, 3, "DelegRemove", 11, }, { NFSV4OP_DELEGRETURN, 7, "DelegRename1", 12, }, { NFSV4OP_DELEGRETURN, 9, "DelegRename2", 12, }, { NFSV4OP_GETATTR, 1, "Getacl", 6, }, { NFSV4OP_SETATTR, 1, "Setacl", 6, }, { NFSV4OP_EXCHANGEID, 1, "ExchangeID", 10, }, { NFSV4OP_CREATESESSION, 1, "CreateSession", 13, }, { NFSV4OP_DESTROYSESSION, 1, "DestroySession", 14, }, { NFSV4OP_DESTROYCLIENTID, 1, "DestroyClient", 13, }, { NFSV4OP_FREESTATEID, 1, "FreeStateID", 11, }, { NFSV4OP_LAYOUTGET, 1, "LayoutGet", 9, }, { NFSV4OP_GETDEVINFO, 1, "GetDeviceInfo", 13, }, { NFSV4OP_LAYOUTCOMMIT, 1, "LayoutCommit", 12, }, { NFSV4OP_LAYOUTRETURN, 1, "LayoutReturn", 12, }, { NFSV4OP_RECLAIMCOMPL, 1, "ReclaimComplete", 15, }, { NFSV4OP_WRITE, 1, "WriteDS", 7, }, { NFSV4OP_READ, 1, "ReadDS", 6, }, { NFSV4OP_COMMIT, 1, "CommitDS", 8, }, { NFSV4OP_OPEN, 3, "OpenLayoutGet", 13, }, { NFSV4OP_OPEN, 8, "CreateLayGet", 12, }, { NFSV4OP_IOADVISE, 1, "Advise", 6, }, { NFSV4OP_ALLOCATE, 2, "Allocate", 8, }, { NFSV4OP_SAVEFH, 5, "Copy", 4, }, { NFSV4OP_SEEK, 2, "Seek", 4, }, { NFSV4OP_SEEK, 1, "SeekDS", 6, }, { NFSV4OP_GETXATTR, 2, "Getxattr", 8, }, { NFSV4OP_SETXATTR, 2, "Setxattr", 8, }, { NFSV4OP_REMOVEXATTR, 2, "Rmxattr", 7, }, { NFSV4OP_LISTXATTRS, 2, "Listxattr", 9, }, }; /* * NFS RPCS that have large request message size. */ static int nfs_bigrequest[NFSV42_NPROCS] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }; /* * Start building a request. Mostly just put the first file handle in * place. */ APPLESTATIC void nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, u_int8_t *nfhp, int fhlen, u_int32_t **opcntpp, struct nfsclsession *sep, int vers, int minorvers, bool use_ext) { struct mbuf *mb; u_int32_t *tl; int opcnt; nfsattrbit_t attrbits; /* * First, fill in some of the fields of nd. */ nd->nd_slotseq = NULL; if (vers == NFS_VER4) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; if (minorvers == NFSV41_MINORVERSION) nd->nd_flag |= ND_NFSV41; else if (minorvers == NFSV42_MINORVERSION) nd->nd_flag |= (ND_NFSV41 | ND_NFSV42); } else if (vers == NFS_VER3) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else { if (NFSHASNFSV4(nmp)) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; if (nmp->nm_minorvers == 1) nd->nd_flag |= ND_NFSV41; else if (nmp->nm_minorvers == 2) nd->nd_flag |= (ND_NFSV41 | ND_NFSV42); } else if (NFSHASNFSV3(nmp)) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else nd->nd_flag = ND_NFSV2 | ND_NFSCL; } nd->nd_procnum = procnum; nd->nd_repstat = 0; nd->nd_maxextsiz = 16384; if (use_ext && PMAP_HAS_DMAP != 0) { nd->nd_flag |= ND_NOMAP; #ifdef KERN_TLS nd->nd_maxextsiz = min(TLS_MAX_MSG_SIZE_V10_2, ktls_maxlen); #endif } /* * Get the first mbuf for the request. */ if ((nd->nd_flag & ND_NOMAP) != 0) { - mb = mb_alloc_ext_plus_pages(PAGE_SIZE, M_WAITOK, false, + mb = mb_alloc_ext_plus_pages(PAGE_SIZE, M_WAITOK, mb_free_mext_pgs); nd->nd_mreq = nd->nd_mb = mb; nfsm_set(nd, 0, true); } else { if (nfs_bigrequest[procnum]) NFSMCLGET(mb, M_WAITOK); else NFSMGET(mb); mb->m_len = 0; nd->nd_mreq = nd->nd_mb = mb; nd->nd_bpos = mtod(mb, char *); } /* * And fill the first file handle into the request. */ if (nd->nd_flag & ND_NFSV4) { opcnt = nfsv4_opmap[procnum].opcnt + nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh; if ((nd->nd_flag & ND_NFSV41) != 0) { opcnt += nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq; if (procnum == NFSPROC_RENEW) /* * For the special case of Renew, just do a * Sequence Op. */ opcnt = 1; else if (procnum == NFSPROC_WRITEDS || procnum == NFSPROC_COMMITDS) /* * For the special case of a Writeor Commit to * a DS, the opcnt == 3, for Sequence, PutFH, * Write/Commit. */ opcnt = 3; } /* * What should the tag really be? */ (void) nfsm_strtom(nd, nfsv4_opmap[procnum].tag, nfsv4_opmap[procnum].taglen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if ((nd->nd_flag & ND_NFSV42) != 0) *tl++ = txdr_unsigned(NFSV42_MINORVERSION); else if ((nd->nd_flag & ND_NFSV41) != 0) *tl++ = txdr_unsigned(NFSV41_MINORVERSION); else *tl++ = txdr_unsigned(NFSV4_MINORVERSION); if (opcntpp != NULL) *opcntpp = tl; *tl = txdr_unsigned(opcnt); if ((nd->nd_flag & ND_NFSV41) != 0 && nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq > 0) { if (nfsv4_opflag[nfsv4_opmap[procnum].op].loopbadsess > 0) nd->nd_flag |= ND_LOOPBADSESS; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_SEQUENCE); if (sep == NULL) { sep = nfsmnt_mdssession(nmp); nfsv4_setsequence(nmp, nd, sep, nfs_bigreply[procnum]); } else nfsv4_setsequence(nmp, nd, sep, nfs_bigreply[procnum]); } if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh > 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, nfhp, fhlen, 0); if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh == 2 && procnum != NFSPROC_WRITEDS && procnum != NFSPROC_COMMITDS) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); /* * For Lookup Ops, we want all the directory * attributes, so we can load the name cache. */ if (procnum == NFSPROC_LOOKUP || procnum == NFSPROC_LOOKUPP) NFSGETATTR_ATTRBIT(&attrbits); else { NFSWCCATTR_ATTRBIT(&attrbits); nd->nd_flag |= ND_V4WCCATTR; } (void) nfsrv_putattrbit(nd, &attrbits); } } if (procnum != NFSPROC_RENEW || (nd->nd_flag & ND_NFSV41) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsv4_opmap[procnum].op); } } else { (void) nfsm_fhtom(nd, nfhp, fhlen, 0); } if (procnum < NFSV42_NPROCS) NFSINCRGLOBAL(nfsstatsv1.rpccnt[procnum]); } /* * Put a state Id in the mbuf list. */ APPLESTATIC void nfsm_stateidtom(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, int flag) { nfsv4stateid_t *st; NFSM_BUILD(st, nfsv4stateid_t *, NFSX_STATEID); if (flag == NFSSTATEID_PUTALLZERO) { st->seqid = 0; st->other[0] = 0; st->other[1] = 0; st->other[2] = 0; } else if (flag == NFSSTATEID_PUTALLONE) { st->seqid = 0xffffffff; st->other[0] = 0xffffffff; st->other[1] = 0xffffffff; st->other[2] = 0xffffffff; } else if (flag == NFSSTATEID_PUTSEQIDZERO) { st->seqid = 0; st->other[0] = stateidp->other[0]; st->other[1] = stateidp->other[1]; st->other[2] = stateidp->other[2]; } else { st->seqid = stateidp->seqid; st->other[0] = stateidp->other[0]; st->other[1] = stateidp->other[1]; st->other[2] = stateidp->other[2]; } } /* * Fill in the setable attributes. The full argument indicates whether * to fill in them all or just mode and time. */ void nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap, struct vnode *vp, int flags, u_int32_t rdev) { u_int32_t *tl; struct nfsv2_sattr *sp; nfsattrbit_t attrbits; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_BUILD(sp, struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (mode_t)VNOVAL) sp->sa_mode = newnfs_xdrneg1; else sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = newnfs_xdrneg1; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = newnfs_xdrneg1; else sp->sa_gid = txdr_unsigned(vap->va_gid); if (flags & NFSSATTR_SIZE0) sp->sa_size = 0; else if (flags & NFSSATTR_SIZENEG1) sp->sa_size = newnfs_xdrneg1; else if (flags & NFSSATTR_SIZERDEV) sp->sa_size = txdr_unsigned(rdev); else sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); break; case ND_NFSV3: if (vap->va_mode != (mode_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_mode); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_uid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_gid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(vap->va_size, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if (vap->va_atime.tv_sec != VNOVAL) { if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_atime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } if (vap->va_mtime.tv_sec != VNOVAL) { if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_mtime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } break; case ND_NFSV4: NFSZERO_ATTRBIT(&attrbits); if (vap->va_mode != (mode_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE); if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP); if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); if (vap->va_atime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); if (vap->va_mtime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET); (void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL); break; } } #ifndef APPLE /* * copies mbuf chain to the uio scatter/gather list */ int nfsm_mbufuio(struct nfsrv_descript *nd, struct uio *uiop, int siz) { char *uiocp; int xfer, left, len; long uiosiz, rem; int error = 0; rem = NFSM_RNDUP(siz) - siz; while (siz > 0) { if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) { error = EBADRPC; goto out; } left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { if ((nd->nd_md->m_flags & M_NOMAP) != 0) xfer = nfsm_copyfrommbuf_extpgs(nd, uiocp, uiop->uio_segflg, left); else xfer = nfsm_copyfrommbuf(nd, uiocp, uiop->uio_segflg, left); left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; if (left > 0 && !nfsm_shiftnext(nd, &len)) { error = EBADRPC; goto out; } } if (uiop->uio_iov->iov_len <= siz) { uiop->uio_iovcnt--; uiop->uio_iov++; } else { uiop->uio_iov->iov_base = (void *) ((char *)uiop->uio_iov->iov_base + uiosiz); uiop->uio_iov->iov_len -= uiosiz; } siz -= uiosiz; } if (rem > 0) error = nfsm_advance(nd, rem, -1); out: NFSEXITCODE2(error, nd); return (error); } #endif /* !APPLE */ /* * Help break down an mbuf chain by setting the first siz bytes contiguous * pointed to by returned val. * This is used by the macro NFSM_DISSECT for tough * cases. */ APPLESTATIC void * nfsm_dissct(struct nfsrv_descript *nd, int siz, int how) { struct mbuf *mp2; struct mbuf_ext_pgs *pgs; int siz2, xfer; caddr_t p; int left; caddr_t retp; retp = NULL; if ((nd->nd_md->m_flags & M_NOMAP) != 0) left = nd->nd_dextpgsiz; else left = mtod(nd->nd_md, char *) + nd->nd_md->m_len - nd->nd_dpos; while (left == 0) { if ((nd->nd_md->m_flags & M_NOMAP) != 0 && nd->nd_dextpg < - nd->nd_md->m_ext.ext_pgs->npgs - 1) { - pgs = nd->nd_md->m_ext.ext_pgs; + nd->nd_md->m_ext_pgs.npgs - 1) { + pgs = &nd->nd_md->m_ext_pgs; nd->nd_dextpg++; nd->nd_dpos = (char *)(void *) - PHYS_TO_DMAP(pgs->pa[nd->nd_dextpg]); + PHYS_TO_DMAP(nd->nd_md->m_epg_pa[nd->nd_dextpg]); left = nd->nd_dextpgsiz = mbuf_ext_pg_len(pgs, nd->nd_dextpg, 0); } else if (!nfsm_shiftnext(nd, &left)) return (NULL); } if (left >= siz) { retp = nd->nd_dpos; nd->nd_dpos += siz; if ((nd->nd_md->m_flags & M_NOMAP) != 0) nd->nd_dextpgsiz -= siz; } else if (siz > ncl_mbuf_mhlen) { panic("nfs S too big"); } else { /* Make sure an ext_pgs mbuf is at the last page. */ if ((nd->nd_md->m_flags & M_NOMAP) != 0) { if (nd->nd_dextpg < - nd->nd_md->m_ext.ext_pgs->npgs - 1) { + nd->nd_md->m_ext_pgs.npgs - 1) { mp2 = nfsm_splitatpgno(nd->nd_md, nd->nd_dextpg, how); if (mp2 == NULL) return (NULL); } - nd->nd_md->m_ext.ext_pgs->last_pg_len -= - left; + nd->nd_md->m_ext_pgs.last_pg_len -= left; } if (nd->nd_md->m_next == NULL) return (NULL); /* Allocate a new mbuf for the "siz" bytes of data. */ MGET(mp2, MT_DATA, how); if (mp2 == NULL) return (NULL); /* * Link the new mp2 mbuf into the list then copy left * bytes from the mbuf before it and siz - left bytes * from the mbuf after it. */ mp2->m_next = nd->nd_md->m_next; nd->nd_md->m_next = mp2; nd->nd_md->m_len -= left; retp = p = mtod(mp2, char *); memcpy(p, nd->nd_dpos, left); /* Copy what was left */ siz2 = siz - left; p += left; mp2->m_len = siz; nd->nd_md = mp2->m_next; /* Loop around copying up the siz2 bytes */ while (siz2 > 0) { if (nd->nd_md == NULL) return (NULL); nfsm_set(nd, 0, false); if ((nd->nd_md->m_flags & M_NOMAP) != 0) xfer = nfsm_copyfrommbuf_extpgs(nd, p, UIO_SYSSPACE, siz2); else xfer = nfsm_copyfrommbuf(nd, p, UIO_SYSSPACE, siz2); p += xfer; siz2 -= xfer; if (siz2 > 0) nd->nd_md = nd->nd_md->m_next; } } return (retp); } /* * Advance the position in the mbuf chain. * If offs == 0, this is a no-op, but it is simpler to just return from * here than check for offs > 0 for all calls to nfsm_advance. * If left == -1, it should be calculated here. */ APPLESTATIC int nfsm_advance(struct nfsrv_descript *nd, int offs, int left) { int error = 0, xfer; if (offs == 0) goto out; /* * A negative offs might indicate a corrupted mbuf chain and, * as such, a printf is logged. */ if (offs < 0) { printf("nfsrv_advance: negative offs\n"); error = EBADRPC; goto out; } /* * If left == -1, calculate it here. */ if (left == -1) { if ((nd->nd_md->m_flags & M_NOMAP) != 0) left = nd->nd_dextpgsiz; else left = mtod(nd->nd_md, char *) + nd->nd_md->m_len - nd->nd_dpos; } /* * Loop around, advancing over the mbuf data. */ while (offs > left) { if ((nd->nd_md->m_flags & M_NOMAP) != 0 && nd->nd_dextpg < - nd->nd_md->m_ext.ext_pgs->npgs - 1) { + nd->nd_md->m_ext_pgs.npgs - 1) { xfer = nfsm_copyfrommbuf_extpgs(nd, NULL, UIO_SYSSPACE, offs); offs -= xfer; } else offs -= left; left = 0; if (offs > 0 && !nfsm_shiftnext(nd, &left)) { error = EBADRPC; goto out; } } if (offs > 0) { if ((nd->nd_md->m_flags & M_NOMAP) != 0) nfsm_copyfrommbuf_extpgs(nd, NULL, UIO_SYSSPACE, offs); else nd->nd_dpos += offs; } out: NFSEXITCODE(error); return (error); } /* * Copy a string into mbuf(s). * Return the number of bytes output, including XDR overheads. */ APPLESTATIC int nfsm_strtom(struct nfsrv_descript *nd, const char *cp, int siz) { struct mbuf *m2; int xfer, left; struct mbuf *m1; int rem, bytesize; u_int32_t *tl; char *cp2; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(siz); rem = NFSM_RNDUP(siz) - siz; bytesize = NFSX_UNSIGNED + siz + rem; m2 = nd->nd_mb; cp2 = nd->nd_bpos; if ((nd->nd_flag & ND_NOMAP) != 0) left = nd->nd_bextpgsiz; else left = M_TRAILINGSPACE(m2); KASSERT(((m2->m_flags & (M_EXT | M_NOMAP)) == (M_EXT | M_NOMAP) && (nd->nd_flag & ND_NOMAP) != 0) || ((m2->m_flags & (M_EXT | M_NOMAP)) != (M_EXT | M_NOMAP) && (nd->nd_flag & ND_NOMAP) == 0), ("nfsm_strtom: ext_pgs and non-ext_pgs mbufs mixed")); /* * Loop around copying the string to mbuf(s). */ while (siz > 0) { if (left == 0) { if ((nd->nd_flag & ND_NOMAP) != 0) { m2 = nfsm_add_ext_pgs(m2, nd->nd_maxextsiz, &nd->nd_bextpg); cp2 = (char *)(void *)PHYS_TO_DMAP( - m2->m_ext.ext_pgs->pa[nd->nd_bextpg]); + m2->m_epg_pa[nd->nd_bextpg]); nd->nd_bextpgsiz = left = PAGE_SIZE; } else { if (siz > ncl_mbuf_mlen) NFSMCLGET(m1, M_WAITOK); else NFSMGET(m1); m1->m_len = 0; cp2 = mtod(m1, char *); left = M_TRAILINGSPACE(m1); m2->m_next = m1; m2 = m1; } } if (left >= siz) xfer = siz; else xfer = left; NFSBCOPY(cp, cp2, xfer); cp += xfer; cp2 += xfer; m2->m_len += xfer; siz -= xfer; left -= xfer; if ((nd->nd_flag & ND_NOMAP) != 0) { nd->nd_bextpgsiz -= xfer; - m2->m_ext.ext_pgs->last_pg_len += xfer; + m2->m_ext_pgs.last_pg_len += xfer; } if (siz == 0 && rem) { if (left < rem) panic("nfsm_strtom"); NFSBZERO(cp2, rem); m2->m_len += rem; cp2 += rem; if ((nd->nd_flag & ND_NOMAP) != 0) { nd->nd_bextpgsiz -= rem; - m2->m_ext.ext_pgs->last_pg_len += rem; + m2->m_ext_pgs.last_pg_len += rem; } } } nd->nd_mb = m2; if ((nd->nd_flag & ND_NOMAP) != 0) nd->nd_bpos = cp2; else nd->nd_bpos = mtod(m2, char *) + m2->m_len; return (bytesize); } /* * Called once to initialize data structures... */ APPLESTATIC void newnfs_init(void) { static int nfs_inited = 0; if (nfs_inited) return; nfs_inited = 1; newnfs_true = txdr_unsigned(TRUE); newnfs_false = txdr_unsigned(FALSE); newnfs_xdrneg1 = txdr_unsigned(-1); nfscl_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfscl_ticks < 1) nfscl_ticks = 1; NFSSETBOOTTIME(nfsboottime); /* * Initialize reply list and start timer */ TAILQ_INIT(&nfsd_reqq); NFS_TIMERINIT; } /* * Put a file handle in an mbuf list. * If the size argument == 0, just use the default size. * set_true == 1 if there should be an newnfs_true prepended on the file handle. * Return the number of bytes output, including XDR overhead. */ APPLESTATIC int nfsm_fhtom(struct nfsrv_descript *nd, u_int8_t *fhp, int size, int set_true) { u_int32_t *tl; u_int8_t *cp; int fullsiz, rem, bytesize = 0; if (size == 0) size = NFSX_MYFH; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: if (size > NFSX_V2FH) panic("fh size > NFSX_V2FH for NFSv2"); NFSM_BUILD(cp, u_int8_t *, NFSX_V2FH); NFSBCOPY(fhp, cp, size); if (size < NFSX_V2FH) NFSBZERO(cp + size, NFSX_V2FH - size); bytesize = NFSX_V2FH; break; case ND_NFSV3: case ND_NFSV4: fullsiz = NFSM_RNDUP(size); rem = fullsiz - size; if (set_true) { bytesize = 2 * NFSX_UNSIGNED + fullsiz; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; } else { bytesize = NFSX_UNSIGNED + fullsiz; } (void) nfsm_strtom(nd, fhp, size); break; } return (bytesize); } /* * This function compares two net addresses by family and returns TRUE * if they are the same host. * If there is any doubt, return FALSE. * The AF_INET family is handled as a special case so that address mbufs * don't need to be saved to store "struct in_addr", which is only 4 bytes. */ APPLESTATIC int nfsaddr_match(int family, union nethostaddr *haddr, NFSSOCKADDR_T nam) { #ifdef INET struct sockaddr_in *inetaddr; #endif switch (family) { #ifdef INET case AF_INET: inetaddr = NFSSOCKADDR(nam, struct sockaddr_in *); if (inetaddr->sin_family == AF_INET && inetaddr->sin_addr.s_addr == haddr->had_inet.s_addr) return (1); break; #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *inetaddr6; inetaddr6 = NFSSOCKADDR(nam, struct sockaddr_in6 *); /* XXX - should test sin6_scope_id ? */ if (inetaddr6->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&inetaddr6->sin6_addr, &haddr->had_inet6)) return (1); } break; #endif } return (0); } /* * Similar to the above, but takes to NFSSOCKADDR_T args. */ APPLESTATIC int nfsaddr2_match(NFSSOCKADDR_T nam1, NFSSOCKADDR_T nam2) { struct sockaddr_in *addr1, *addr2; struct sockaddr *inaddr; inaddr = NFSSOCKADDR(nam1, struct sockaddr *); switch (inaddr->sa_family) { case AF_INET: addr1 = NFSSOCKADDR(nam1, struct sockaddr_in *); addr2 = NFSSOCKADDR(nam2, struct sockaddr_in *); if (addr2->sin_family == AF_INET && addr1->sin_addr.s_addr == addr2->sin_addr.s_addr) return (1); break; #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *inet6addr1, *inet6addr2; inet6addr1 = NFSSOCKADDR(nam1, struct sockaddr_in6 *); inet6addr2 = NFSSOCKADDR(nam2, struct sockaddr_in6 *); /* XXX - should test sin6_scope_id ? */ if (inet6addr2->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&inet6addr1->sin6_addr, &inet6addr2->sin6_addr)) return (1); } break; #endif } return (0); } /* * Trim trailing data off the mbuf list being built. */ APPLESTATIC void nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos, int bextpg, int bextpgsiz) { struct mbuf_ext_pgs *pgs; vm_page_t pg; int fullpgsiz, i; if (mb->m_next != NULL) { m_freem(mb->m_next); mb->m_next = NULL; } if ((mb->m_flags & M_NOMAP) != 0) { - pgs = mb->m_ext.ext_pgs; + pgs = &mb->m_ext_pgs; /* First, get rid of any pages after this position. */ for (i = pgs->npgs - 1; i > bextpg; i--) { - pg = PHYS_TO_VM_PAGE(pgs->pa[i]); + pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]); vm_page_unwire_noq(pg); vm_page_free(pg); } pgs->npgs = bextpg + 1; if (bextpg == 0) fullpgsiz = PAGE_SIZE - pgs->first_pg_off; else fullpgsiz = PAGE_SIZE; pgs->last_pg_len = fullpgsiz - bextpgsiz; mb->m_len = mbuf_ext_pg_len(pgs, 0, pgs->first_pg_off); for (i = 1; i < pgs->npgs; i++) mb->m_len += mbuf_ext_pg_len(pgs, i, 0); nd->nd_bextpgsiz = bextpgsiz; nd->nd_bextpg = bextpg; } else mb->m_len = bpos - mtod(mb, char *); nd->nd_mb = mb; nd->nd_bpos = bpos; } /* * Dissect a file handle on the client. */ APPLESTATIC int nfsm_getfh(struct nfsrv_descript *nd, struct nfsfh **nfhpp) { u_int32_t *tl; struct nfsfh *nfhp; int error, len; *nfhpp = NULL; if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { error = EBADRPC; goto nfsmout; } } else len = NFSX_V2FH; nfhp = malloc(sizeof (struct nfsfh) + len, M_NFSFH, M_WAITOK); error = nfsrv_mtostr(nd, nfhp->nfh_fh, len); if (error) { free(nfhp, M_NFSFH); goto nfsmout; } nfhp->nfh_len = len; *nfhpp = nfhp; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Break down the nfsv4 acl. * If the aclp == NULL or won't fit in an acl, just discard the acl info. */ APPLESTATIC int nfsrv_dissectacl(struct nfsrv_descript *nd, NFSACL_T *aclp, int *aclerrp, int *aclsizep, __unused NFSPROC_T *p) { u_int32_t *tl; int i, aclsize; int acecnt, error = 0, aceerr = 0, acesize; *aclerrp = 0; if (aclp) aclp->acl_cnt = 0; /* * Parse out the ace entries and expect them to conform to * what can be supported by R/W/X bits. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); aclsize = NFSX_UNSIGNED; acecnt = fxdr_unsigned(int, *tl); if (acecnt > ACL_MAX_ENTRIES) aceerr = NFSERR_ATTRNOTSUPP; if (nfsrv_useacl == 0) aceerr = NFSERR_ATTRNOTSUPP; for (i = 0; i < acecnt; i++) { if (aclp && !aceerr) error = nfsrv_dissectace(nd, &aclp->acl_entry[i], &aceerr, &acesize, p); else error = nfsrv_skipace(nd, &acesize); if (error) goto nfsmout; aclsize += acesize; } if (aclp && !aceerr) aclp->acl_cnt = acecnt; if (aceerr) *aclerrp = aceerr; if (aclsizep) *aclsizep = aclsize; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Skip over an NFSv4 ace entry. Just dissect the xdr and discard it. */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep) { u_int32_t *tl; int error, len = 0; NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 3)); error = nfsm_advance(nd, NFSM_RNDUP(len), -1); nfsmout: *acesizep = NFSM_RNDUP(len) + (4 * NFSX_UNSIGNED); NFSEXITCODE2(error, nd); return (error); } /* * Get attribute bits from an mbuf list. * Returns EBADRPC for a parsing error, 0 otherwise. * If the clearinvalid flag is set, clear the bits not supported. */ APPLESTATIC int nfsrv_getattrbits(struct nfsrv_descript *nd, nfsattrbit_t *attrbitp, int *cntp, int *retnotsupp) { u_int32_t *tl; int cnt, i, outcnt; int error = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (cnt > NFSATTRBIT_MAXWORDS) outcnt = NFSATTRBIT_MAXWORDS; else outcnt = cnt; NFSZERO_ATTRBIT(attrbitp); if (outcnt > 0) { NFSM_DISSECT(tl, u_int32_t *, outcnt * NFSX_UNSIGNED); for (i = 0; i < outcnt; i++) attrbitp->bits[i] = fxdr_unsigned(u_int32_t, *tl++); } for (i = 0; i < (cnt - outcnt); i++) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (retnotsupp != NULL && *tl != 0) *retnotsupp = NFSERR_ATTRNOTSUPP; } if (cntp) *cntp = NFSX_UNSIGNED + (cnt * NFSX_UNSIGNED); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Get the attributes for V4. * If the compare flag is true, test for any attribute changes, * otherwise return the attribute values. * These attributes cover fields in "struct vattr", "struct statfs", * "struct nfsfsinfo", the file handle and the lease duration. * The value of retcmpp is set to 1 if all attributes are the same, * and 0 otherwise. * Returns EBADRPC if it can't be parsed, 0 otherwise. */ APPLESTATIC int nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nap, struct nfsfh **nfhpp, fhandle_t *fhp, int fhsize, struct nfsv3_pathconf *pc, struct statfs *sbp, struct nfsstatfs *sfp, struct nfsfsinfo *fsp, NFSACL_T *aclp, int compare, int *retcmpp, u_int32_t *leasep, u_int32_t *rderrp, NFSPROC_T *p, struct ucred *cred) { u_int32_t *tl; int i = 0, j, k, l = 0, m, bitpos, attrsum = 0; int error, tfhsize, aceerr, attrsize, cnt, retnotsup; u_char *cp, *cp2, namestr[NFSV4_SMALLSTR + 1]; nfsattrbit_t attrbits, retattrbits, checkattrbits; struct nfsfh *tnfhp; struct nfsreferral *refp; u_quad_t tquad; nfsquad_t tnfsquad; struct timespec temptime; uid_t uid; gid_t gid; u_int32_t freenum = 0, tuint; u_int64_t uquad = 0, thyp, thyp2; #ifdef QUOTA struct dqblk dqb; uid_t savuid; #endif CTASSERT(sizeof(ino_t) == sizeof(uint64_t)); if (compare) { retnotsup = 0; error = nfsrv_getattrbits(nd, &attrbits, NULL, &retnotsup); } else { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); } if (error) goto nfsmout; if (compare) { *retcmpp = retnotsup; } else { /* * Just set default values to some of the important ones. */ if (nap != NULL) { nap->na_type = VREG; nap->na_mode = 0; nap->na_rdev = (NFSDEV_T)0; nap->na_mtime.tv_sec = 0; nap->na_mtime.tv_nsec = 0; nap->na_gen = 0; nap->na_flags = 0; nap->na_blocksize = NFS_FABLKSIZE; } if (sbp != NULL) { sbp->f_bsize = NFS_FABLKSIZE; sbp->f_blocks = 0; sbp->f_bfree = 0; sbp->f_bavail = 0; sbp->f_files = 0; sbp->f_ffree = 0; } if (fsp != NULL) { fsp->fs_rtmax = 8192; fsp->fs_rtpref = 8192; fsp->fs_maxname = NFS_MAXNAMLEN; fsp->fs_wtmax = 8192; fsp->fs_wtpref = 8192; fsp->fs_wtmult = NFS_FABLKSIZE; fsp->fs_dtpref = 8192; fsp->fs_maxfilesize = 0xffffffffffffffffull; fsp->fs_timedelta.tv_sec = 0; fsp->fs_timedelta.tv_nsec = 1; fsp->fs_properties = (NFSV3_FSFLINK | NFSV3_FSFSYMLINK | NFSV3_FSFHOMOGENEOUS | NFSV3_FSFCANSETTIME); } if (pc != NULL) { pc->pc_linkmax = NFS_LINK_MAX; pc->pc_namemax = NAME_MAX; pc->pc_notrunc = 0; pc->pc_chownrestricted = 0; pc->pc_caseinsensitive = 0; pc->pc_casepreserving = 1; } if (sfp != NULL) { sfp->sf_ffiles = UINT64_MAX; sfp->sf_tfiles = UINT64_MAX; sfp->sf_afiles = UINT64_MAX; sfp->sf_fbytes = UINT64_MAX; sfp->sf_tbytes = UINT64_MAX; sfp->sf_abytes = UINT64_MAX; } } /* * Loop around getting the attributes. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsize = fxdr_unsigned(int, *tl); for (bitpos = 0; bitpos < NFSATTRBIT_MAX; bitpos++) { if (attrsum > attrsize) { error = NFSERR_BADXDR; goto nfsmout; } if (NFSISSET_ATTRBIT(&attrbits, bitpos)) switch (bitpos) { case NFSATTRBIT_SUPPORTEDATTRS: retnotsup = 0; if (compare || nap == NULL) error = nfsrv_getattrbits(nd, &retattrbits, &cnt, &retnotsup); else error = nfsrv_getattrbits(nd, &nap->na_suppattr, &cnt, &retnotsup); if (error) goto nfsmout; if (compare && !(*retcmpp)) { NFSSETSUPP_ATTRBIT(&checkattrbits, nd); /* Some filesystem do not support NFSv4ACL */ if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACL); NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACLSUPPORT); } if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits) || retnotsup) *retcmpp = NFSERR_NOTSAME; } attrsum += cnt; break; case NFSATTRBIT_TYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (nap->na_type != nfsv34tov_type(*tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_type = nfsv34tov_type(*tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FHEXPIRETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (fxdr_unsigned(int, *tl) != NFSV4FHTYPE_PERSISTENT) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHANGE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (nap->na_filerev != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_filerev = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (nap->na_size != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_size = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_LINKSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFLINK) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFLINK; else fsp->fs_properties &= ~NFSV3_FSFLINK; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_SYMLINKSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFSYMLINK) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFSYMLINK; else fsp->fs_properties &= ~NFSV3_FSFSYMLINK; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NAMEDATTR: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (*tl != newnfs_false) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSID: NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); thyp = fxdr_hyper(tl); tl += 2; thyp2 = fxdr_hyper(tl); if (compare) { if (*retcmpp == 0) { if (thyp != (u_int64_t) vfs_statfs(vnode_mount(vp))->f_fsid.val[0] || thyp2 != (u_int64_t) vfs_statfs(vnode_mount(vp))->f_fsid.val[1]) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_filesid[0] = thyp; nap->na_filesid[1] = thyp2; } attrsum += (4 * NFSX_UNSIGNED); break; case NFSATTRBIT_UNIQUEHANDLES: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_LEASETIME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (fxdr_unsigned(int, *tl) != nfsrv_lease && !(*retcmpp)) *retcmpp = NFSERR_NOTSAME; } else if (leasep != NULL) { *leasep = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_RDATTRERROR: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) *retcmpp = NFSERR_INVAL; } else if (rderrp != NULL) { *rderrp = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_ACL: if (compare) { if (!(*retcmpp)) { if (nfsrv_useacl && nfs_supportsnfsv4acls(vp)) { NFSACL_T *naclp; naclp = acl_alloc(M_WAITOK); error = nfsrv_dissectacl(nd, naclp, &aceerr, &cnt, p); if (error) { acl_free(naclp); goto nfsmout; } if (aceerr || aclp == NULL || nfsrv_compareacl(aclp, naclp)) *retcmpp = NFSERR_NOTSAME; acl_free(naclp); } else { error = nfsrv_dissectacl(nd, NULL, &aceerr, &cnt, p); *retcmpp = NFSERR_ATTRNOTSUPP; } } } else { if (vp != NULL && aclp != NULL) error = nfsrv_dissectacl(nd, aclp, &aceerr, &cnt, p); else error = nfsrv_dissectacl(nd, NULL, &aceerr, &cnt, p); if (error) goto nfsmout; } attrsum += cnt; break; case NFSATTRBIT_ACLSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (nfsrv_useacl && nfs_supportsnfsv4acls(vp)) { if (fxdr_unsigned(u_int32_t, *tl) != NFSV4ACE_SUPTYPES) *retcmpp = NFSERR_NOTSAME; } else { *retcmpp = NFSERR_ATTRNOTSUPP; } } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_ARCHIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CANSETTIME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFCANSETTIME) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFCANSETTIME; else fsp->fs_properties &= ~NFSV3_FSFCANSETTIME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEINSENSITIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_false) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_caseinsensitive = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEPRESERVING: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHOWNRESTRICTED: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_chownrestricted = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FILEHANDLE: error = nfsm_getfh(nd, &tnfhp); if (error) goto nfsmout; tfhsize = tnfhp->nfh_len; if (compare) { if (!(*retcmpp) && !NFSRV_CMPFH(tnfhp->nfh_fh, tfhsize, fhp, fhsize)) *retcmpp = NFSERR_NOTSAME; free(tnfhp, M_NFSFH); } else if (nfhpp != NULL) { *nfhpp = tnfhp; } else { free(tnfhp, M_NFSFH); } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(tfhsize)); break; case NFSATTRBIT_FILEID: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if (nap->na_fileid != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) nap->na_fileid = thyp; attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESAVAIL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_afiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_afiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESFREE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_ffiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_ffiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESTOTAL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_tfiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_tfiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FSLOCATIONS: error = nfsrv_getrefstr(nd, &cp, &cp2, &l, &m); if (error) goto nfsmout; attrsum += l; if (compare && !(*retcmpp)) { refp = nfsv4root_getreferral(vp, NULL, 0); if (refp != NULL) { if (cp == NULL || cp2 == NULL || strcmp(cp, "/") || strcmp(cp2, refp->nfr_srvlist)) *retcmpp = NFSERR_NOTSAME; } else if (m == 0) { *retcmpp = NFSERR_NOTSAME; } } if (cp != NULL) free(cp, M_NFSSTRING); if (cp2 != NULL) free(cp2, M_NFSSTRING); break; case NFSATTRBIT_HIDDEN: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HOMOGENEOUS: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFHOMOGENEOUS) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFHOMOGENEOUS; else fsp->fs_properties &= ~NFSV3_FSFHOMOGENEOUS; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXFILESIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); tnfsquad.qval = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { tquad = NFSRV_MAXFILESIZE; if (tquad != tnfsquad.qval) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_maxfilesize = tnfsquad.qval; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MAXLINK: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fxdr_unsigned(int, *tl) != NFS_LINK_MAX) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXNAME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_maxname != fxdr_unsigned(u_int32_t, *tl)) *retcmpp = NFSERR_NOTSAME; } } else { tuint = fxdr_unsigned(u_int32_t, *tl); /* * Some Linux NFSv4 servers report this * as 0 or 4billion, so I'll set it to * NFS_MAXNAMLEN. If a server actually creates * a name longer than NFS_MAXNAMLEN, it will * get an error back. */ if (tuint == 0 || tuint > NFS_MAXNAMLEN) tuint = NFS_MAXNAMLEN; if (fsp != NULL) fsp->fs_maxname = tuint; if (pc != NULL) pc->pc_namemax = tuint; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXREAD: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (fsp->fs_rtmax != fxdr_unsigned(u_int32_t, *(tl + 1)) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *++tl); fsp->fs_rtpref = fsp->fs_rtmax; fsp->fs_dtpref = fsp->fs_rtpref; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MAXWRITE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (fsp->fs_wtmax != fxdr_unsigned(u_int32_t, *(tl + 1)) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_wtmax = fxdr_unsigned(int, *++tl); fsp->fs_wtpref = fsp->fs_wtmax; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MIMETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; break; case NFSATTRBIT_MODE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (nap->na_mode != nfstov_mode(*tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_mode = nfstov_mode(*tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NOTRUNC: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NUMLINKS: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); tuint = fxdr_unsigned(u_int32_t, *tl); if (compare) { if (!(*retcmpp)) { if ((u_int32_t)nap->na_nlink != tuint) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_nlink = tuint; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (compare) { if (!(*retcmpp)) { if (nfsv4_strtouid(nd, cp, j, &uid) || nap->na_uid != uid) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { if (nfsv4_strtouid(nd, cp, j, &uid)) nap->na_uid = nfsrv_defaultuid; else nap->na_uid = uid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); break; case NFSATTRBIT_OWNERGROUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (compare) { if (!(*retcmpp)) { if (nfsv4_strtogid(nd, cp, j, &gid) || nap->na_gid != gid) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { if (nfsv4_strtogid(nd, cp, j, &gid)) nap->na_gid = nfsrv_defaultgid; else nap->na_gid = gid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); break; case NFSATTRBIT_QUOTAHARD: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA)) freenum = sbp->f_bfree; else freenum = sbp->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vnode_mount(vp),QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bhardlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_QUOTASOFT: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA)) freenum = sbp->f_bfree; else freenum = sbp->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vnode_mount(vp),QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bsoftlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_QUOTAUSED: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { freenum = 0; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vnode_mount(vp),QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = dqb.dqb_curblocks; p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_RAWDEV: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4SPECDATA); j = fxdr_unsigned(int, *tl++); k = fxdr_unsigned(int, *tl); if (compare) { if (!(*retcmpp)) { if (nap->na_rdev != NFSMAKEDEV(j, k)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_rdev = NFSMAKEDEV(j, k); } attrsum += NFSX_V4SPECDATA; break; case NFSATTRBIT_SPACEAVAIL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_abytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_abytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACEFREE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_fbytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_fbytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACETOTAL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_tbytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_tbytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACEUSED: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if ((u_int64_t)nap->na_bytes != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_bytes = thyp; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SYSTEM: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESS: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_atime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_atime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEACCESSSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (i == NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); attrsum += NFSX_V4TIME; } if (compare && !(*retcmpp)) *retcmpp = NFSERR_INVAL; break; case NFSATTRBIT_TIMEBACKUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEDELTA: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (fsp != NULL) { if (compare) { if (!(*retcmpp)) { if ((u_int32_t)fsp->fs_timedelta.tv_sec != fxdr_unsigned(u_int32_t, *(tl + 1)) || (u_int32_t)fsp->fs_timedelta.tv_nsec != (fxdr_unsigned(u_int32_t, *(tl + 2)) % 1000000000) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else { fxdr_nfsv4time(tl, &fsp->fs_timedelta); } } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMETADATA: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_ctime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_ctime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFY: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_mtime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_mtime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (i == NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); attrsum += NFSX_V4TIME; } if (compare && !(*retcmpp)) *retcmpp = NFSERR_INVAL; break; case NFSATTRBIT_MOUNTEDONFILEID: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if (!vp || !nfsrv_atroot(vp, &thyp2)) thyp2 = nap->na_fileid; if (thyp2 != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) nap->na_mntonfileno = thyp; attrsum += NFSX_HYPER; break; case NFSATTRBIT_SUPPATTREXCLCREAT: retnotsup = 0; error = nfsrv_getattrbits(nd, &retattrbits, &cnt, &retnotsup); if (error) goto nfsmout; if (compare && !(*retcmpp)) { NFSSETSUPP_ATTRBIT(&checkattrbits, nd); NFSCLRNOTSETABLE_ATTRBIT(&checkattrbits, nd); NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_TIMEACCESSSET); if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits) || retnotsup) *retcmpp = NFSERR_NOTSAME; } attrsum += cnt; break; case NFSATTRBIT_FSLAYOUTTYPE: case NFSATTRBIT_LAYOUTTYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (i > 0) { NFSM_DISSECT(tl, u_int32_t *, i * NFSX_UNSIGNED); attrsum += i * NFSX_UNSIGNED; j = fxdr_unsigned(int, *tl); if (i == 1 && compare && !(*retcmpp) && (((nfsrv_doflexfile != 0 || nfsrv_maxpnfsmirror > 1) && j != NFSLAYOUT_FLEXFILE) || (nfsrv_doflexfile == 0 && j != NFSLAYOUT_NFSV4_1_FILES))) *retcmpp = NFSERR_NOTSAME; } if (nfsrv_devidcnt == 0) { if (compare && !(*retcmpp) && i > 0) *retcmpp = NFSERR_NOTSAME; } else { if (compare && !(*retcmpp) && i != 1) *retcmpp = NFSERR_NOTSAME; } break; case NFSATTRBIT_LAYOUTALIGNMENT: case NFSATTRBIT_LAYOUTBLKSIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (compare && !(*retcmpp) && i != NFS_SRVMAXIO) *retcmpp = NFSERR_NOTSAME; break; default: printf("EEK! nfsv4_loadattr unknown attr=%d\n", bitpos); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; /* * and get out of the loop, since we can't parse * the unknown attrbute data. */ bitpos = NFSATTRBIT_MAX; break; } } /* * some clients pad the attrlist, so we need to skip over the * padding. */ if (attrsum > attrsize) { error = NFSERR_BADXDR; } else { attrsize = NFSM_RNDUP(attrsize); if (attrsum < attrsize) error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Implement sleep locks for newnfs. The nfslock_usecnt allows for a * shared lock and the NFSXXX_LOCK flag permits an exclusive lock. * The first argument is a pointer to an nfsv4lock structure. * The second argument is 1 iff a blocking lock is wanted. * If this argument is 0, the call waits until no thread either wants nor * holds an exclusive lock. * It returns 1 if the lock was acquired, 0 otherwise. * If several processes call this function concurrently wanting the exclusive * lock, one will get the lock and the rest will return without getting the * lock. (If the caller must have the lock, it simply calls this function in a * loop until the function returns 1 to indicate the lock was acquired.) * Any usecnt must be decremented by calling nfsv4_relref() before * calling nfsv4_lock(). It was done this way, so nfsv4_lock() could * be called in a loop. * The isleptp argument is set to indicate if the call slept, iff not NULL * and the mp argument indicates to check for a forced dismount, iff not * NULL. */ APPLESTATIC int nfsv4_lock(struct nfsv4lock *lp, int iwantlock, int *isleptp, void *mutex, struct mount *mp) { if (isleptp) *isleptp = 0; /* * If a lock is wanted, loop around until the lock is acquired by * someone and then released. If I want the lock, try to acquire it. * For a lock to be issued, no lock must be in force and the usecnt * must be zero. */ if (iwantlock) { if (!(lp->nfslock_lock & NFSV4LOCK_LOCK) && lp->nfslock_usecnt == 0) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; lp->nfslock_lock |= NFSV4LOCK_LOCK; return (1); } lp->nfslock_lock |= NFSV4LOCK_LOCKWANTED; } while (lp->nfslock_lock & (NFSV4LOCK_LOCK | NFSV4LOCK_LOCKWANTED)) { if (mp != NULL && NFSCL_FORCEDISM(mp)) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; return (0); } lp->nfslock_lock |= NFSV4LOCK_WANTED; if (isleptp) *isleptp = 1; (void) nfsmsleep(&lp->nfslock_lock, mutex, PZERO - 1, "nfsv4lck", NULL); if (iwantlock && !(lp->nfslock_lock & NFSV4LOCK_LOCK) && lp->nfslock_usecnt == 0) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; lp->nfslock_lock |= NFSV4LOCK_LOCK; return (1); } } return (0); } /* * Release the lock acquired by nfsv4_lock(). * The second argument is set to 1 to indicate the nfslock_usecnt should be * incremented, as well. */ APPLESTATIC void nfsv4_unlock(struct nfsv4lock *lp, int incref) { lp->nfslock_lock &= ~NFSV4LOCK_LOCK; if (incref) lp->nfslock_usecnt++; nfsv4_wanted(lp); } /* * Release a reference cnt. */ APPLESTATIC void nfsv4_relref(struct nfsv4lock *lp) { if (lp->nfslock_usecnt <= 0) panic("nfsv4root ref cnt"); lp->nfslock_usecnt--; if (lp->nfslock_usecnt == 0) nfsv4_wanted(lp); } /* * Get a reference cnt. * This function will wait for any exclusive lock to be released, but will * not wait for threads that want the exclusive lock. If priority needs * to be given to threads that need the exclusive lock, a call to nfsv4_lock() * with the 2nd argument == 0 should be done before calling nfsv4_getref(). * If the mp argument is not NULL, check for NFSCL_FORCEDISM() being set and * return without getting a refcnt for that case. */ APPLESTATIC void nfsv4_getref(struct nfsv4lock *lp, int *isleptp, void *mutex, struct mount *mp) { if (isleptp) *isleptp = 0; /* * Wait for a lock held. */ while (lp->nfslock_lock & NFSV4LOCK_LOCK) { if (mp != NULL && NFSCL_FORCEDISM(mp)) return; lp->nfslock_lock |= NFSV4LOCK_WANTED; if (isleptp) *isleptp = 1; (void) nfsmsleep(&lp->nfslock_lock, mutex, PZERO - 1, "nfsv4gr", NULL); } if (mp != NULL && NFSCL_FORCEDISM(mp)) return; lp->nfslock_usecnt++; } /* * Get a reference as above, but return failure instead of sleeping if * an exclusive lock is held. */ APPLESTATIC int nfsv4_getref_nonblock(struct nfsv4lock *lp) { if ((lp->nfslock_lock & NFSV4LOCK_LOCK) != 0) return (0); lp->nfslock_usecnt++; return (1); } /* * Test for a lock. Return 1 if locked, 0 otherwise. */ APPLESTATIC int nfsv4_testlock(struct nfsv4lock *lp) { if ((lp->nfslock_lock & NFSV4LOCK_LOCK) == 0 && lp->nfslock_usecnt == 0) return (0); return (1); } /* * Wake up anyone sleeping, waiting for this lock. */ static void nfsv4_wanted(struct nfsv4lock *lp) { if (lp->nfslock_lock & NFSV4LOCK_WANTED) { lp->nfslock_lock &= ~NFSV4LOCK_WANTED; wakeup((caddr_t)&lp->nfslock_lock); } } /* * Copy a string from an mbuf list into a character array. * Return EBADRPC if there is an mbuf error, * 0 otherwise. */ APPLESTATIC int nfsrv_mtostr(struct nfsrv_descript *nd, char *str, int siz) { int rem, error = 0, xfer; rem = NFSM_RNDUP(siz) - siz; while (siz > 0) { if ((nd->nd_md->m_flags & M_NOMAP) != 0) xfer = nfsm_copyfrommbuf_extpgs(nd, str, UIO_SYSSPACE, siz); else xfer = nfsm_copyfrommbuf(nd, str, UIO_SYSSPACE, siz); str += xfer; siz -= xfer; if (siz > 0 && !nfsm_shiftnext(nd, &xfer)) { error = EBADRPC; goto out; } } *str = '\0'; if (rem > 0) error = nfsm_advance(nd, rem, -1); out: NFSEXITCODE2(error, nd); return (error); } /* * Fill in the attributes as marked by the bitmap (V4). */ APPLESTATIC int nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, NFSACL_T *saclp, struct vattr *vap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, NFSPROC_T *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno, struct statfs *pnfssf) { int bitpos, retnum = 0; u_int32_t *tl; int siz, prefixnum, error; u_char *cp, namestr[NFSV4_SMALLSTR]; nfsattrbit_t attrbits, retbits; nfsattrbit_t *retbitp = &retbits; u_int32_t freenum, *retnump; u_int64_t uquad; struct statfs *fs; struct nfsfsinfo fsinf; struct timespec temptime; NFSACL_T *aclp, *naclp = NULL; size_t atsiz; bool xattrsupp; #ifdef QUOTA struct dqblk dqb; uid_t savuid; #endif /* * First, set the bits that can be filled and get fsinfo. */ NFSSET_ATTRBIT(retbitp, attrbitp); /* * If both p and cred are NULL, it is a client side setattr call. * If both p and cred are not NULL, it is a server side reply call. * If p is not NULL and cred is NULL, it is a client side callback * reply call. */ if (p == NULL && cred == NULL) { NFSCLRNOTSETABLE_ATTRBIT(retbitp, nd); aclp = saclp; } else { NFSCLRNOTFILLABLE_ATTRBIT(retbitp, nd); naclp = acl_alloc(M_WAITOK); aclp = naclp; } nfsvno_getfs(&fsinf, isdgram); #ifndef APPLE /* * Get the VFS_STATFS(), since some attributes need them. */ fs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); if (NFSISSETSTATFS_ATTRBIT(retbitp)) { error = VFS_STATFS(mp, fs); if (error != 0) { if (reterr) { nd->nd_repstat = NFSERR_ACCES; free(fs, M_STATFS); return (0); } NFSCLRSTATFS_ATTRBIT(retbitp); } } #endif /* * And the NFSv4 ACL... */ if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_ACLSUPPORT) && (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0))) { NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACLSUPPORT); } if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_ACL)) { if (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0)) { NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACL); } else if (naclp != NULL) { if (NFSVOPLOCK(vp, LK_SHARED) == 0) { error = VOP_ACCESSX(vp, VREAD_ACL, cred, p); if (error == 0) error = VOP_GETACL(vp, ACL_TYPE_NFS4, naclp, cred, p); NFSVOPUNLOCK(vp); } else error = NFSERR_PERM; if (error != 0) { if (reterr) { nd->nd_repstat = NFSERR_ACCES; free(fs, M_STATFS); return (0); } NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACL); } } } /* Check to see if Extended Attributes are supported. */ xattrsupp = false; if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_XATTRSUPPORT)) { if (NFSVOPLOCK(vp, LK_SHARED) == 0) { error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, "xxx", NULL, &atsiz, cred, p); NFSVOPUNLOCK(vp); if (error != EOPNOTSUPP) xattrsupp = true; } } /* * Put out the attribute bitmap for the ones being filled in * and get the field for the number of attributes returned. */ prefixnum = nfsrv_putattrbit(nd, retbitp); NFSM_BUILD(retnump, u_int32_t *, NFSX_UNSIGNED); prefixnum += NFSX_UNSIGNED; /* * Now, loop around filling in the attributes for each bit set. */ for (bitpos = 0; bitpos < NFSATTRBIT_MAX; bitpos++) { if (NFSISSET_ATTRBIT(retbitp, bitpos)) { switch (bitpos) { case NFSATTRBIT_SUPPORTEDATTRS: NFSSETSUPP_ATTRBIT(&attrbits, nd); if (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0)) { NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACLSUPPORT); NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACL); } retnum += nfsrv_putattrbit(nd, &attrbits); break; case NFSATTRBIT_TYPE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vap->va_type); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FHEXPIRETYPE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4FHTYPE_PERSISTENT); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHANGE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_filerev, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SIZE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_size, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_LINKSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_LINK) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_SYMLINKSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_SYMLINK) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NAMEDATTR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSID: NFSM_BUILD(tl, u_int32_t *, NFSX_V4FSID); *tl++ = 0; *tl++ = txdr_unsigned(mp->mnt_stat.f_fsid.val[0]); *tl++ = 0; *tl = txdr_unsigned(mp->mnt_stat.f_fsid.val[1]); retnum += NFSX_V4FSID; break; case NFSATTRBIT_UNIQUEHANDLES: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_LEASETIME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsrv_lease); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_RDATTRERROR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(rderror); retnum += NFSX_UNSIGNED; break; /* * Recommended Attributes. (Only the supported ones.) */ case NFSATTRBIT_ACL: retnum += nfsrv_buildacl(nd, aclp, vnode_vtype(vp), p); break; case NFSATTRBIT_ACLSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4ACE_SUPTYPES); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CANSETTIME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_CANSETTIME) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEINSENSITIVE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEPRESERVING: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHOWNRESTRICTED: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FILEHANDLE: retnum += nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); break; case NFSATTRBIT_FILEID: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = vap->va_fileid; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESAVAIL: /* * Check quota and use min(quota, f_ffree). */ freenum = fs->f_ffree; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_isoftlimit-dqb.dqb_curinodes, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(freenum); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESFREE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fs->f_ffree); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESTOTAL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fs->f_files); retnum += NFSX_HYPER; break; case NFSATTRBIT_FSLOCATIONS: NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = 0; retnum += 2 * NFSX_UNSIGNED; break; case NFSATTRBIT_HOMOGENEOUS: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_HOMOGENEOUS) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXFILESIZE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = NFSRV_MAXFILESIZE; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_MAXLINK: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_LINK_MAX); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXNAME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_MAXNAMLEN); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXREAD: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fsinf.fs_rtmax); retnum += NFSX_HYPER; break; case NFSATTRBIT_MAXWRITE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fsinf.fs_wtmax); retnum += NFSX_HYPER; break; case NFSATTRBIT_MODE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_mode(vap->va_mode); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NOTRUNC: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NUMLINKS: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(vap->va_nlink); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: cp = namestr; nfsv4_uidtostr(vap->va_uid, &cp, &siz); retnum += nfsm_strtom(nd, cp, siz); if (cp != namestr) free(cp, M_NFSSTRING); break; case NFSATTRBIT_OWNERGROUP: cp = namestr; nfsv4_gidtostr(vap->va_gid, &cp, &siz); retnum += nfsm_strtom(nd, cp, siz); if (cp != namestr) free(cp, M_NFSSTRING); break; case NFSATTRBIT_QUOTAHARD: if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA)) freenum = fs->f_bfree; else freenum = fs->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bhardlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs->f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_QUOTASOFT: if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA)) freenum = fs->f_bfree; else freenum = fs->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bsoftlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs->f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_QUOTAUSED: freenum = 0; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = dqb.dqb_curblocks; p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs->f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_RAWDEV: NFSM_BUILD(tl, u_int32_t *, NFSX_V4SPECDATA); *tl++ = txdr_unsigned(NFSMAJOR(vap->va_rdev)); *tl = txdr_unsigned(NFSMINOR(vap->va_rdev)); retnum += NFSX_V4SPECDATA; break; case NFSATTRBIT_SPACEAVAIL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE)) { if (pnfssf != NULL) uquad = (u_int64_t)pnfssf->f_bfree; else uquad = (u_int64_t)fs->f_bfree; } else { if (pnfssf != NULL) uquad = (u_int64_t)pnfssf->f_bavail; else uquad = (u_int64_t)fs->f_bavail; } if (pnfssf != NULL) uquad *= pnfssf->f_bsize; else uquad *= fs->f_bsize; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACEFREE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (pnfssf != NULL) { uquad = (u_int64_t)pnfssf->f_bfree; uquad *= pnfssf->f_bsize; } else { uquad = (u_int64_t)fs->f_bfree; uquad *= fs->f_bsize; } txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACETOTAL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (pnfssf != NULL) { uquad = (u_int64_t)pnfssf->f_blocks; uquad *= pnfssf->f_bsize; } else { uquad = (u_int64_t)fs->f_blocks; uquad *= fs->f_bsize; } txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACEUSED: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_bytes, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_TIMEACCESS: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_atime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEACCESSSET: if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_V4SETTIME); *tl++ = txdr_unsigned(NFSV4SATTRTIME_TOCLIENT); txdr_nfsv4time(&vap->va_atime, tl); retnum += NFSX_V4SETTIME; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4SATTRTIME_TOSERVER); retnum += NFSX_UNSIGNED; } break; case NFSATTRBIT_TIMEDELTA: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); temptime.tv_sec = 0; temptime.tv_nsec = 1000000000 / hz; txdr_nfsv4time(&temptime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMETADATA: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_ctime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFY: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_mtime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_V4SETTIME); *tl++ = txdr_unsigned(NFSV4SATTRTIME_TOCLIENT); txdr_nfsv4time(&vap->va_mtime, tl); retnum += NFSX_V4SETTIME; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4SATTRTIME_TOSERVER); retnum += NFSX_UNSIGNED; } break; case NFSATTRBIT_MOUNTEDONFILEID: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (at_root != 0) uquad = mounted_on_fileno; else uquad = vap->va_fileid; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SUPPATTREXCLCREAT: NFSSETSUPP_ATTRBIT(&attrbits, nd); NFSCLRNOTSETABLE_ATTRBIT(&attrbits, nd); NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); retnum += nfsrv_putattrbit(nd, &attrbits); break; case NFSATTRBIT_FSLAYOUTTYPE: case NFSATTRBIT_LAYOUTTYPE: if (nfsrv_devidcnt == 0) siz = 1; else siz = 2; if (siz == 2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(1); /* One entry. */ if (nfsrv_doflexfile != 0 || nfsrv_maxpnfsmirror > 1) *tl = txdr_unsigned(NFSLAYOUT_FLEXFILE); else *tl = txdr_unsigned( NFSLAYOUT_NFSV4_1_FILES); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = 0; } retnum += siz * NFSX_UNSIGNED; break; case NFSATTRBIT_LAYOUTALIGNMENT: case NFSATTRBIT_LAYOUTBLKSIZE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_SRVMAXIO); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_XATTRSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (xattrsupp) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; default: printf("EEK! Bad V4 attribute bitpos=%d\n", bitpos); } } } if (naclp != NULL) acl_free(naclp); free(fs, M_STATFS); *retnump = txdr_unsigned(retnum); return (retnum + prefixnum); } /* * Put the attribute bits onto an mbuf list. * Return the number of bytes of output generated. */ APPLESTATIC int nfsrv_putattrbit(struct nfsrv_descript *nd, nfsattrbit_t *attrbitp) { u_int32_t *tl; int cnt, i, bytesize; for (cnt = NFSATTRBIT_MAXWORDS; cnt > 0; cnt--) if (attrbitp->bits[cnt - 1]) break; bytesize = (cnt + 1) * NFSX_UNSIGNED; NFSM_BUILD(tl, u_int32_t *, bytesize); *tl++ = txdr_unsigned(cnt); for (i = 0; i < cnt; i++) *tl++ = txdr_unsigned(attrbitp->bits[i]); return (bytesize); } /* * Convert a uid to a string. * If the lookup fails, just output the digits. * uid - the user id * cpp - points to a buffer of size NFSV4_SMALLSTR * (malloc a larger one, as required) * retlenp - pointer to length to be returned */ APPLESTATIC void nfsv4_uidtostr(uid_t uid, u_char **cpp, int *retlenp) { int i; struct nfsusrgrp *usrp; u_char *cp = *cpp; uid_t tmp; int cnt, hasampersand, len = NFSV4_SMALLSTR, ret; struct nfsrv_lughash *hp; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0 && !nfs_enable_uidtostring) { /* * Always map nfsrv_defaultuid to "nobody". */ if (uid == nfsrv_defaultuid) { i = nfsrv_dnsnamelen + 7; if (i > len) { if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY("nobody@", cp, 7); cp += 7; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); return; } hasampersand = 0; hp = NFSUSERHASH(uid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { if (usrp->lug_uid == uid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; /* * If the name doesn't already have an '@' * in it, append @domainname to it. */ for (i = 0; i < usrp->lug_namelen; i++) { if (usrp->lug_name[i] == '@') { hasampersand = 1; break; } } if (hasampersand) i = usrp->lug_namelen; else i = usrp->lug_namelen + nfsrv_dnsnamelen + 1; if (i > len) { mtx_unlock(&hp->mtx); if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY(usrp->lug_name, cp, usrp->lug_namelen); if (!hasampersand) { cp += usrp->lug_namelen; *cp++ = '@'; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); } TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); return; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUID, uid, (gid_t)0, NULL); if (ret == 0 && cnt < 2) goto tryagain; } /* * No match, just return a string of digits. */ tmp = uid; i = 0; while (tmp || i == 0) { tmp /= 10; i++; } len = (i > len) ? len : i; *retlenp = len; cp += (len - 1); tmp = uid; for (i = 0; i < len; i++) { *cp-- = '0' + (tmp % 10); tmp /= 10; } return; } /* * Get a credential for the uid with the server's group list. * If none is found, just return the credential passed in after * logging a warning message. */ struct ucred * nfsrv_getgrpscred(struct ucred *oldcred) { struct nfsusrgrp *usrp; struct ucred *newcred; int cnt, ret; uid_t uid; struct nfsrv_lughash *hp; cnt = 0; uid = oldcred->cr_uid; tryagain: if (nfsrv_dnsnamelen > 0) { hp = NFSUSERHASH(uid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { if (usrp->lug_uid == uid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; if (usrp->lug_cred != NULL) { newcred = crhold(usrp->lug_cred); crfree(oldcred); } else newcred = oldcred; TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); return (newcred); } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUID, uid, (gid_t)0, NULL); if (ret == 0 && cnt < 2) goto tryagain; } return (oldcred); } /* * Convert a string to a uid. * If no conversion is possible return NFSERR_BADOWNER, otherwise * return 0. * If this is called from a client side mount using AUTH_SYS and the * string is made up entirely of digits, just convert the string to * a number. */ APPLESTATIC int nfsv4_strtouid(struct nfsrv_descript *nd, u_char *str, int len, uid_t *uidp) { int i; char *cp, *endstr, *str0; struct nfsusrgrp *usrp; int cnt, ret; int error = 0; uid_t tuid; struct nfsrv_lughash *hp, *hp2; if (len == 0) { error = NFSERR_BADOWNER; goto out; } /* If a string of digits and an AUTH_SYS mount, just convert it. */ str0 = str; tuid = (uid_t)strtoul(str0, &endstr, 10); if ((endstr - str0) == len) { /* A numeric string. */ if ((nd->nd_flag & ND_KERBV) == 0 && ((nd->nd_flag & ND_NFSCL) != 0 || nfsd_enable_stringtouid != 0)) *uidp = tuid; else error = NFSERR_BADOWNER; goto out; } /* * Look for an '@'. */ cp = strchr(str0, '@'); if (cp != NULL) i = (int)(cp++ - str0); else i = len; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0) { /* * If an '@' is found and the domain name matches, search for * the name with dns stripped off. * Mixed case alpahbetics will match for the domain name, but * all upper case will not. */ if (cnt == 0 && i < len && i > 0 && (len - 1 - i) == nfsrv_dnsnamelen && !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) { len -= (nfsrv_dnsnamelen + 1); *(cp - 1) = '\0'; } /* * Check for the special case of "nobody". */ if (len == 6 && !NFSBCMP(str, "nobody", 6)) { *uidp = nfsrv_defaultuid; error = 0; goto out; } hp = NFSUSERNAMEHASH(str, len); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_namehash) { if (usrp->lug_namelen == len && !NFSBCMP(usrp->lug_name, str, len)) { if (usrp->lug_expiry < NFSD_MONOSEC) break; hp2 = NFSUSERHASH(usrp->lug_uid); mtx_lock(&hp2->mtx); TAILQ_REMOVE(&hp2->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp2->lughead, usrp, lug_numhash); *uidp = usrp->lug_uid; mtx_unlock(&hp2->mtx); mtx_unlock(&hp->mtx); error = 0; goto out; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUSER, (uid_t)0, (gid_t)0, str); if (ret == 0 && cnt < 2) goto tryagain; } error = NFSERR_BADOWNER; out: NFSEXITCODE(error); return (error); } /* * Convert a gid to a string. * gid - the group id * cpp - points to a buffer of size NFSV4_SMALLSTR * (malloc a larger one, as required) * retlenp - pointer to length to be returned */ APPLESTATIC void nfsv4_gidtostr(gid_t gid, u_char **cpp, int *retlenp) { int i; struct nfsusrgrp *usrp; u_char *cp = *cpp; gid_t tmp; int cnt, hasampersand, len = NFSV4_SMALLSTR, ret; struct nfsrv_lughash *hp; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0 && !nfs_enable_uidtostring) { /* * Always map nfsrv_defaultgid to "nogroup". */ if (gid == nfsrv_defaultgid) { i = nfsrv_dnsnamelen + 8; if (i > len) { if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY("nogroup@", cp, 8); cp += 8; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); return; } hasampersand = 0; hp = NFSGROUPHASH(gid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { if (usrp->lug_gid == gid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; /* * If the name doesn't already have an '@' * in it, append @domainname to it. */ for (i = 0; i < usrp->lug_namelen; i++) { if (usrp->lug_name[i] == '@') { hasampersand = 1; break; } } if (hasampersand) i = usrp->lug_namelen; else i = usrp->lug_namelen + nfsrv_dnsnamelen + 1; if (i > len) { mtx_unlock(&hp->mtx); if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY(usrp->lug_name, cp, usrp->lug_namelen); if (!hasampersand) { cp += usrp->lug_namelen; *cp++ = '@'; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); } TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); return; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETGID, (uid_t)0, gid, NULL); if (ret == 0 && cnt < 2) goto tryagain; } /* * No match, just return a string of digits. */ tmp = gid; i = 0; while (tmp || i == 0) { tmp /= 10; i++; } len = (i > len) ? len : i; *retlenp = len; cp += (len - 1); tmp = gid; for (i = 0; i < len; i++) { *cp-- = '0' + (tmp % 10); tmp /= 10; } return; } /* * Convert a string to a gid. * If no conversion is possible return NFSERR_BADOWNER, otherwise * return 0. * If this is called from a client side mount using AUTH_SYS and the * string is made up entirely of digits, just convert the string to * a number. */ APPLESTATIC int nfsv4_strtogid(struct nfsrv_descript *nd, u_char *str, int len, gid_t *gidp) { int i; char *cp, *endstr, *str0; struct nfsusrgrp *usrp; int cnt, ret; int error = 0; gid_t tgid; struct nfsrv_lughash *hp, *hp2; if (len == 0) { error = NFSERR_BADOWNER; goto out; } /* If a string of digits and an AUTH_SYS mount, just convert it. */ str0 = str; tgid = (gid_t)strtoul(str0, &endstr, 10); if ((endstr - str0) == len) { /* A numeric string. */ if ((nd->nd_flag & ND_KERBV) == 0 && ((nd->nd_flag & ND_NFSCL) != 0 || nfsd_enable_stringtouid != 0)) *gidp = tgid; else error = NFSERR_BADOWNER; goto out; } /* * Look for an '@'. */ cp = strchr(str0, '@'); if (cp != NULL) i = (int)(cp++ - str0); else i = len; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0) { /* * If an '@' is found and the dns name matches, search for the * name with the dns stripped off. */ if (cnt == 0 && i < len && i > 0 && (len - 1 - i) == nfsrv_dnsnamelen && !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) { len -= (nfsrv_dnsnamelen + 1); *(cp - 1) = '\0'; } /* * Check for the special case of "nogroup". */ if (len == 7 && !NFSBCMP(str, "nogroup", 7)) { *gidp = nfsrv_defaultgid; error = 0; goto out; } hp = NFSGROUPNAMEHASH(str, len); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_namehash) { if (usrp->lug_namelen == len && !NFSBCMP(usrp->lug_name, str, len)) { if (usrp->lug_expiry < NFSD_MONOSEC) break; hp2 = NFSGROUPHASH(usrp->lug_gid); mtx_lock(&hp2->mtx); TAILQ_REMOVE(&hp2->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp2->lughead, usrp, lug_numhash); *gidp = usrp->lug_gid; mtx_unlock(&hp2->mtx); mtx_unlock(&hp->mtx); error = 0; goto out; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETGROUP, (uid_t)0, (gid_t)0, str); if (ret == 0 && cnt < 2) goto tryagain; } error = NFSERR_BADOWNER; out: NFSEXITCODE(error); return (error); } /* * Cmp len chars, allowing mixed case in the first argument to match lower * case in the second, but not if the first argument is all upper case. * Return 0 for a match, 1 otherwise. */ static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len) { int i; u_char tmp; int fndlower = 0; for (i = 0; i < len; i++) { if (*cp >= 'A' && *cp <= 'Z') { tmp = *cp++ + ('a' - 'A'); } else { tmp = *cp++; if (tmp >= 'a' && tmp <= 'z') fndlower = 1; } if (tmp != *cp2++) return (1); } if (fndlower) return (0); else return (1); } /* * Set the port for the nfsuserd. */ APPLESTATIC int nfsrv_nfsuserdport(struct nfsuserd_args *nargs, NFSPROC_T *p) { struct nfssockreq *rp; #ifdef INET struct sockaddr_in *ad; #endif #ifdef INET6 struct sockaddr_in6 *ad6; const struct in6_addr in6loopback = IN6ADDR_LOOPBACK_INIT; #endif int error; NFSLOCKNAMEID(); if (nfsrv_nfsuserd != NOTRUNNING) { NFSUNLOCKNAMEID(); error = EPERM; goto out; } nfsrv_nfsuserd = STARTSTOP; /* * Set up the socket record and connect. * Set nr_client NULL before unlocking, just to ensure that no other * process/thread/core will use a bogus old value. This could only * occur if the use of the nameid lock to protect nfsrv_nfsuserd is * broken. */ rp = &nfsrv_nfsuserdsock; rp->nr_client = NULL; NFSUNLOCKNAMEID(); rp->nr_sotype = SOCK_DGRAM; rp->nr_soproto = IPPROTO_UDP; rp->nr_lock = (NFSR_RESERVEDPORT | NFSR_LOCALHOST); rp->nr_cred = NULL; rp->nr_prog = RPCPROG_NFSUSERD; error = 0; switch (nargs->nuserd_family) { #ifdef INET case AF_INET: rp->nr_nam = malloc(sizeof(struct sockaddr_in), M_SONAME, M_WAITOK | M_ZERO); ad = (struct sockaddr_in *)rp->nr_nam; ad->sin_len = sizeof(struct sockaddr_in); ad->sin_family = AF_INET; ad->sin_addr.s_addr = htonl(INADDR_LOOPBACK); ad->sin_port = nargs->nuserd_port; break; #endif #ifdef INET6 case AF_INET6: rp->nr_nam = malloc(sizeof(struct sockaddr_in6), M_SONAME, M_WAITOK | M_ZERO); ad6 = (struct sockaddr_in6 *)rp->nr_nam; ad6->sin6_len = sizeof(struct sockaddr_in6); ad6->sin6_family = AF_INET6; ad6->sin6_addr = in6loopback; ad6->sin6_port = nargs->nuserd_port; break; #endif default: error = ENXIO; } rp->nr_vers = RPCNFSUSERD_VERS; if (error == 0) error = newnfs_connect(NULL, rp, NFSPROCCRED(p), p, 0); if (error == 0) { NFSLOCKNAMEID(); nfsrv_nfsuserd = RUNNING; NFSUNLOCKNAMEID(); } else { free(rp->nr_nam, M_SONAME); NFSLOCKNAMEID(); nfsrv_nfsuserd = NOTRUNNING; NFSUNLOCKNAMEID(); } out: NFSEXITCODE(error); return (error); } /* * Delete the nfsuserd port. */ APPLESTATIC void nfsrv_nfsuserddelport(void) { NFSLOCKNAMEID(); if (nfsrv_nfsuserd != RUNNING) { NFSUNLOCKNAMEID(); return; } nfsrv_nfsuserd = STARTSTOP; /* Wait for all upcalls to complete. */ while (nfsrv_userdupcalls > 0) msleep(&nfsrv_userdupcalls, NFSNAMEIDMUTEXPTR, PVFS, "nfsupcalls", 0); NFSUNLOCKNAMEID(); newnfs_disconnect(&nfsrv_nfsuserdsock); free(nfsrv_nfsuserdsock.nr_nam, M_SONAME); NFSLOCKNAMEID(); nfsrv_nfsuserd = NOTRUNNING; NFSUNLOCKNAMEID(); } /* * Do upcalls to the nfsuserd, for cache misses of the owner/ownergroup * name<-->id cache. * Returns 0 upon success, non-zero otherwise. */ static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name) { u_int32_t *tl; struct nfsrv_descript *nd; int len; struct nfsrv_descript nfsd; struct ucred *cred; int error; NFSLOCKNAMEID(); if (nfsrv_nfsuserd != RUNNING) { NFSUNLOCKNAMEID(); error = EPERM; goto out; } /* * Maintain a count of upcalls in progress, so that nfsrv_X() * can wait until no upcalls are in progress. */ nfsrv_userdupcalls++; NFSUNLOCKNAMEID(); KASSERT(nfsrv_userdupcalls > 0, ("nfsrv_getuser: non-positive upcalls")); nd = &nfsd; cred = newnfs_getcred(); nd->nd_flag = ND_GSSINITREPLY; nfsrvd_rephead(nd); nd->nd_procnum = procnum; if (procnum == RPCNFSUSERD_GETUID || procnum == RPCNFSUSERD_GETGID) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (procnum == RPCNFSUSERD_GETUID) *tl = txdr_unsigned(uid); else *tl = txdr_unsigned(gid); } else { len = strlen(name); (void) nfsm_strtom(nd, name, len); } error = newnfs_request(nd, NULL, NULL, &nfsrv_nfsuserdsock, NULL, NULL, cred, RPCPROG_NFSUSERD, RPCNFSUSERD_VERS, NULL, 0, NULL, NULL); NFSLOCKNAMEID(); if (--nfsrv_userdupcalls == 0 && nfsrv_nfsuserd == STARTSTOP) wakeup(&nfsrv_userdupcalls); NFSUNLOCKNAMEID(); NFSFREECRED(cred); if (!error) { m_freem(nd->nd_mrep); error = nd->nd_repstat; } out: NFSEXITCODE(error); return (error); } /* * This function is called from the nfssvc(2) system call, to update the * kernel user/group name list(s) for the V4 owner and ownergroup attributes. */ APPLESTATIC int nfssvc_idname(struct nfsd_idargs *nidp) { struct nfsusrgrp *nusrp, *usrp, *newusrp; struct nfsrv_lughash *hp_name, *hp_idnum, *thp; int i, group_locked, groupname_locked, user_locked, username_locked; int error = 0; u_char *cp; gid_t *grps; struct ucred *cr; static int onethread = 0; static time_t lasttime = 0; if (nidp->nid_namelen <= 0 || nidp->nid_namelen > MAXHOSTNAMELEN) { error = EINVAL; goto out; } if (nidp->nid_flag & NFSID_INITIALIZE) { cp = malloc(nidp->nid_namelen + 1, M_NFSSTRING, M_WAITOK); error = copyin(CAST_USER_ADDR_T(nidp->nid_name), cp, nidp->nid_namelen); if (error != 0) { free(cp, M_NFSSTRING); goto out; } if (atomic_cmpset_acq_int(&nfsrv_dnsnamelen, 0, 0) == 0) { /* * Free up all the old stuff and reinitialize hash * lists. All mutexes for both lists must be locked, * with the user/group name ones before the uid/gid * ones, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsusernamehash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsuserhash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_FOREACH_SAFE(usrp, &nfsuserhash[i].lughead, lug_numhash, nusrp) nfsrv_removeuser(usrp, 1); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsuserhash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsusernamehash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgroupnamehash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgrouphash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_FOREACH_SAFE(usrp, &nfsgrouphash[i].lughead, lug_numhash, nusrp) nfsrv_removeuser(usrp, 0); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgrouphash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgroupnamehash[i].mtx); free(nfsrv_dnsname, M_NFSSTRING); nfsrv_dnsname = NULL; } if (nfsuserhash == NULL) { /* Allocate the hash tables. */ nfsuserhash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsuserhash[i].mtx, "nfsuidhash", NULL, MTX_DEF | MTX_DUPOK); nfsusernamehash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsusernamehash[i].mtx, "nfsusrhash", NULL, MTX_DEF | MTX_DUPOK); nfsgrouphash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsgrouphash[i].mtx, "nfsgidhash", NULL, MTX_DEF | MTX_DUPOK); nfsgroupnamehash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsgroupnamehash[i].mtx, "nfsgrphash", NULL, MTX_DEF | MTX_DUPOK); } /* (Re)initialize the list heads. */ for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsuserhash[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsusernamehash[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsgrouphash[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsgroupnamehash[i].lughead); /* * Put name in "DNS" string. */ nfsrv_dnsname = cp; nfsrv_defaultuid = nidp->nid_uid; nfsrv_defaultgid = nidp->nid_gid; nfsrv_usercnt = 0; nfsrv_usermax = nidp->nid_usermax; atomic_store_rel_int(&nfsrv_dnsnamelen, nidp->nid_namelen); goto out; } /* * malloc the new one now, so any potential sleep occurs before * manipulation of the lists. */ newusrp = malloc(sizeof(struct nfsusrgrp) + nidp->nid_namelen, M_NFSUSERGROUP, M_WAITOK | M_ZERO); error = copyin(CAST_USER_ADDR_T(nidp->nid_name), newusrp->lug_name, nidp->nid_namelen); if (error == 0 && nidp->nid_ngroup > 0 && (nidp->nid_flag & NFSID_ADDUID) != 0) { grps = malloc(sizeof(gid_t) * nidp->nid_ngroup, M_TEMP, M_WAITOK); error = copyin(CAST_USER_ADDR_T(nidp->nid_grps), grps, sizeof(gid_t) * nidp->nid_ngroup); if (error == 0) { /* * Create a credential just like svc_getcred(), * but using the group list provided. */ cr = crget(); cr->cr_uid = cr->cr_ruid = cr->cr_svuid = nidp->nid_uid; crsetgroups(cr, nidp->nid_ngroup, grps); cr->cr_rgid = cr->cr_svgid = cr->cr_groups[0]; cr->cr_prison = &prison0; prison_hold(cr->cr_prison); #ifdef MAC mac_cred_associate_nfsd(cr); #endif newusrp->lug_cred = cr; } free(grps, M_TEMP); } if (error) { free(newusrp, M_NFSUSERGROUP); goto out; } newusrp->lug_namelen = nidp->nid_namelen; /* * The lock order is username[0]->[nfsrv_lughashsize - 1] followed * by uid[0]->[nfsrv_lughashsize - 1], with the same for group. * The flags user_locked, username_locked, group_locked and * groupname_locked are set to indicate all of those hash lists are * locked. hp_name != NULL and hp_idnum != NULL indicates that * the respective one mutex is locked. */ user_locked = username_locked = group_locked = groupname_locked = 0; hp_name = hp_idnum = NULL; /* * Delete old entries, as required. */ if (nidp->nid_flag & (NFSID_DELUID | NFSID_ADDUID)) { /* Must lock all username hash lists first, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsusernamehash[i].mtx); username_locked = 1; hp_idnum = NFSUSERHASH(nidp->nid_uid); mtx_lock(&hp_idnum->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_idnum->lughead, lug_numhash, nusrp) { if (usrp->lug_uid == nidp->nid_uid) nfsrv_removeuser(usrp, 1); } } else if (nidp->nid_flag & (NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) { hp_name = NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_lock(&hp_name->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_name->lughead, lug_namehash, nusrp) { if (usrp->lug_namelen == newusrp->lug_namelen && !NFSBCMP(usrp->lug_name, newusrp->lug_name, usrp->lug_namelen)) { thp = NFSUSERHASH(usrp->lug_uid); mtx_lock(&thp->mtx); nfsrv_removeuser(usrp, 1); mtx_unlock(&thp->mtx); } } hp_idnum = NFSUSERHASH(nidp->nid_uid); mtx_lock(&hp_idnum->mtx); } else if (nidp->nid_flag & (NFSID_DELGID | NFSID_ADDGID)) { /* Must lock all groupname hash lists first, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgroupnamehash[i].mtx); groupname_locked = 1; hp_idnum = NFSGROUPHASH(nidp->nid_gid); mtx_lock(&hp_idnum->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_idnum->lughead, lug_numhash, nusrp) { if (usrp->lug_gid == nidp->nid_gid) nfsrv_removeuser(usrp, 0); } } else if (nidp->nid_flag & (NFSID_DELGROUPNAME | NFSID_ADDGROUPNAME)) { hp_name = NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_lock(&hp_name->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_name->lughead, lug_namehash, nusrp) { if (usrp->lug_namelen == newusrp->lug_namelen && !NFSBCMP(usrp->lug_name, newusrp->lug_name, usrp->lug_namelen)) { thp = NFSGROUPHASH(usrp->lug_gid); mtx_lock(&thp->mtx); nfsrv_removeuser(usrp, 0); mtx_unlock(&thp->mtx); } } hp_idnum = NFSGROUPHASH(nidp->nid_gid); mtx_lock(&hp_idnum->mtx); } /* * Now, we can add the new one. */ if (nidp->nid_usertimeout) newusrp->lug_expiry = NFSD_MONOSEC + nidp->nid_usertimeout; else newusrp->lug_expiry = NFSD_MONOSEC + 5; if (nidp->nid_flag & (NFSID_ADDUID | NFSID_ADDUSERNAME)) { newusrp->lug_uid = nidp->nid_uid; thp = NFSUSERHASH(newusrp->lug_uid); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_numhash); thp = NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_namehash); atomic_add_int(&nfsrv_usercnt, 1); } else if (nidp->nid_flag & (NFSID_ADDGID | NFSID_ADDGROUPNAME)) { newusrp->lug_gid = nidp->nid_gid; thp = NFSGROUPHASH(newusrp->lug_gid); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_numhash); thp = NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_namehash); atomic_add_int(&nfsrv_usercnt, 1); } else { if (newusrp->lug_cred != NULL) crfree(newusrp->lug_cred); free(newusrp, M_NFSUSERGROUP); } /* * Once per second, allow one thread to trim the cache. */ if (lasttime < NFSD_MONOSEC && atomic_cmpset_acq_int(&onethread, 0, 1) != 0) { /* * First, unlock the single mutexes, so that all entries * can be locked and any LOR is avoided. */ if (hp_name != NULL) { mtx_unlock(&hp_name->mtx); hp_name = NULL; } if (hp_idnum != NULL) { mtx_unlock(&hp_idnum->mtx); hp_idnum = NULL; } if ((nidp->nid_flag & (NFSID_DELUID | NFSID_ADDUID | NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) != 0) { if (username_locked == 0) { for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsusernamehash[i].mtx); username_locked = 1; } KASSERT(user_locked == 0, ("nfssvc_idname: user_locked")); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsuserhash[i].mtx); user_locked = 1; for (i = 0; i < nfsrv_lughashsize; i++) { TAILQ_FOREACH_SAFE(usrp, &nfsuserhash[i].lughead, lug_numhash, nusrp) if (usrp->lug_expiry < NFSD_MONOSEC) nfsrv_removeuser(usrp, 1); } for (i = 0; i < nfsrv_lughashsize; i++) { /* * Trim the cache using an approximate LRU * algorithm. This code deletes the least * recently used entry on each hash list. */ if (nfsrv_usercnt <= nfsrv_usermax) break; usrp = TAILQ_FIRST(&nfsuserhash[i].lughead); if (usrp != NULL) nfsrv_removeuser(usrp, 1); } } else { if (groupname_locked == 0) { for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgroupnamehash[i].mtx); groupname_locked = 1; } KASSERT(group_locked == 0, ("nfssvc_idname: group_locked")); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgrouphash[i].mtx); group_locked = 1; for (i = 0; i < nfsrv_lughashsize; i++) { TAILQ_FOREACH_SAFE(usrp, &nfsgrouphash[i].lughead, lug_numhash, nusrp) if (usrp->lug_expiry < NFSD_MONOSEC) nfsrv_removeuser(usrp, 0); } for (i = 0; i < nfsrv_lughashsize; i++) { /* * Trim the cache using an approximate LRU * algorithm. This code deletes the least * recently user entry on each hash list. */ if (nfsrv_usercnt <= nfsrv_usermax) break; usrp = TAILQ_FIRST(&nfsgrouphash[i].lughead); if (usrp != NULL) nfsrv_removeuser(usrp, 0); } } lasttime = NFSD_MONOSEC; atomic_store_rel_int(&onethread, 0); } /* Now, unlock all locked mutexes. */ if (hp_idnum != NULL) mtx_unlock(&hp_idnum->mtx); if (hp_name != NULL) mtx_unlock(&hp_name->mtx); if (user_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsuserhash[i].mtx); if (username_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsusernamehash[i].mtx); if (group_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgrouphash[i].mtx); if (groupname_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgroupnamehash[i].mtx); out: NFSEXITCODE(error); return (error); } /* * Remove a user/group name element. */ static void nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser) { struct nfsrv_lughash *hp; if (isuser != 0) { hp = NFSUSERHASH(usrp->lug_uid); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp = NFSUSERNAMEHASH(usrp->lug_name, usrp->lug_namelen); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_namehash); } else { hp = NFSGROUPHASH(usrp->lug_gid); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp = NFSGROUPNAMEHASH(usrp->lug_name, usrp->lug_namelen); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_namehash); } atomic_add_int(&nfsrv_usercnt, -1); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } /* * Free up all the allocations related to the name<-->id cache. * This function should only be called when the nfsuserd daemon isn't * running, since it doesn't do any locking. * This function is meant to be used when the nfscommon module is unloaded. */ APPLESTATIC void nfsrv_cleanusergroup(void) { struct nfsrv_lughash *hp, *hp2; struct nfsusrgrp *nusrp, *usrp; int i; if (nfsuserhash == NULL) return; for (i = 0; i < nfsrv_lughashsize; i++) { hp = &nfsuserhash[i]; TAILQ_FOREACH_SAFE(usrp, &hp->lughead, lug_numhash, nusrp) { TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp2 = NFSUSERNAMEHASH(usrp->lug_name, usrp->lug_namelen); TAILQ_REMOVE(&hp2->lughead, usrp, lug_namehash); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } hp = &nfsgrouphash[i]; TAILQ_FOREACH_SAFE(usrp, &hp->lughead, lug_numhash, nusrp) { TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp2 = NFSGROUPNAMEHASH(usrp->lug_name, usrp->lug_namelen); TAILQ_REMOVE(&hp2->lughead, usrp, lug_namehash); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } mtx_destroy(&nfsuserhash[i].mtx); mtx_destroy(&nfsusernamehash[i].mtx); mtx_destroy(&nfsgroupnamehash[i].mtx); mtx_destroy(&nfsgrouphash[i].mtx); } free(nfsuserhash, M_NFSUSERGROUP); free(nfsusernamehash, M_NFSUSERGROUP); free(nfsgrouphash, M_NFSUSERGROUP); free(nfsgroupnamehash, M_NFSUSERGROUP); free(nfsrv_dnsname, M_NFSSTRING); } /* * This function scans a byte string and checks for UTF-8 compliance. * It returns 0 if it conforms and NFSERR_INVAL if not. */ APPLESTATIC int nfsrv_checkutf8(u_int8_t *cp, int len) { u_int32_t val = 0x0; int cnt = 0, gotd = 0, shift = 0; u_int8_t byte; static int utf8_shift[5] = { 7, 11, 16, 21, 26 }; int error = 0; /* * Here are what the variables are used for: * val - the calculated value of a multibyte char, used to check * that it was coded with the correct range * cnt - the number of 10xxxxxx bytes to follow * gotd - set for a char of Dxxx, so D800<->DFFF can be checked for * shift - lower order bits of range (ie. "val >> shift" should * not be 0, in other words, dividing by the lower bound * of the range should get a non-zero value) * byte - used to calculate cnt */ while (len > 0) { if (cnt > 0) { /* This handles the 10xxxxxx bytes */ if ((*cp & 0xc0) != 0x80 || (gotd && (*cp & 0x20))) { error = NFSERR_INVAL; goto out; } gotd = 0; val <<= 6; val |= (*cp & 0x3f); cnt--; if (cnt == 0 && (val >> shift) == 0x0) { error = NFSERR_INVAL; goto out; } } else if (*cp & 0x80) { /* first byte of multi byte char */ byte = *cp; while ((byte & 0x40) && cnt < 6) { cnt++; byte <<= 1; } if (cnt == 0 || cnt == 6) { error = NFSERR_INVAL; goto out; } val = (*cp & (0x3f >> cnt)); shift = utf8_shift[cnt - 1]; if (cnt == 2 && val == 0xd) /* Check for the 0xd800-0xdfff case */ gotd = 1; } cp++; len--; } if (cnt > 0) error = NFSERR_INVAL; out: NFSEXITCODE(error); return (error); } /* * Parse the xdr for an NFSv4 FsLocations attribute. Return two malloc'd * strings, one with the root path in it and the other with the list of * locations. The list is in the same format as is found in nfr_refs. * It is a "," separated list of entries, where each of them is of the * form :. For example * "nfsv4-test:/sub2,nfsv4-test2:/user/mnt,nfsv4-test2:/user/mnt2" * The nilp argument is set to 1 for the special case of a null fs_root * and an empty server list. * It returns NFSERR_BADXDR, if the xdr can't be parsed and returns the * number of xdr bytes parsed in sump. */ static int nfsrv_getrefstr(struct nfsrv_descript *nd, u_char **fsrootp, u_char **srvp, int *sump, int *nilp) { u_int32_t *tl; u_char *cp = NULL, *cp2 = NULL, *cp3, *str; int i, j, len, stringlen, cnt, slen, siz, xdrsum, error = 0, nsrv; struct list { SLIST_ENTRY(list) next; int len; u_char host[1]; } *lsp, *nlsp; SLIST_HEAD(, list) head; *fsrootp = NULL; *srvp = NULL; *nilp = 0; /* * Get the fs_root path and check for the special case of null path * and 0 length server list. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len < 0 || len > 10240) { error = NFSERR_BADXDR; goto nfsmout; } if (len == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl != 0) { error = NFSERR_BADXDR; goto nfsmout; } *nilp = 1; *sump = 2 * NFSX_UNSIGNED; error = 0; goto nfsmout; } cp = malloc(len + 1, M_NFSSTRING, M_WAITOK); error = nfsrv_mtostr(nd, cp, len); if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt <= 0) error = NFSERR_BADXDR; } if (error) goto nfsmout; /* * Now, loop through the location list and make up the srvlist. */ xdrsum = (2 * NFSX_UNSIGNED) + NFSM_RNDUP(len); cp2 = cp3 = malloc(1024, M_NFSSTRING, M_WAITOK); slen = 1024; siz = 0; for (i = 0; i < cnt; i++) { SLIST_INIT(&head); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nsrv = fxdr_unsigned(int, *tl); if (nsrv <= 0) { error = NFSERR_BADXDR; goto nfsmout; } /* * Handle the first server by putting it in the srvstr. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } nfsrv_refstrbigenough(siz + len + 3, &cp2, &cp3, &slen); if (cp3 != cp2) { *cp3++ = ','; siz++; } error = nfsrv_mtostr(nd, cp3, len); if (error) goto nfsmout; cp3 += len; *cp3++ = ':'; siz += (len + 1); xdrsum += (2 * NFSX_UNSIGNED) + NFSM_RNDUP(len); for (j = 1; j < nsrv; j++) { /* * Yuck, put them in an slist and process them later. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } lsp = (struct list *)malloc(sizeof (struct list) + len, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, lsp->host, len); if (error) goto nfsmout; xdrsum += NFSX_UNSIGNED + NFSM_RNDUP(len); lsp->len = len; SLIST_INSERT_HEAD(&head, lsp, next); } /* * Finally, we can get the path. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } nfsrv_refstrbigenough(siz + len + 1, &cp2, &cp3, &slen); error = nfsrv_mtostr(nd, cp3, len); if (error) goto nfsmout; xdrsum += NFSX_UNSIGNED + NFSM_RNDUP(len); str = cp3; stringlen = len; cp3 += len; siz += len; SLIST_FOREACH_SAFE(lsp, &head, next, nlsp) { nfsrv_refstrbigenough(siz + lsp->len + stringlen + 3, &cp2, &cp3, &slen); *cp3++ = ','; NFSBCOPY(lsp->host, cp3, lsp->len); cp3 += lsp->len; *cp3++ = ':'; NFSBCOPY(str, cp3, stringlen); cp3 += stringlen; *cp3 = '\0'; siz += (lsp->len + stringlen + 2); free(lsp, M_TEMP); } } *fsrootp = cp; *srvp = cp2; *sump = xdrsum; NFSEXITCODE2(0, nd); return (0); nfsmout: if (cp != NULL) free(cp, M_NFSSTRING); if (cp2 != NULL) free(cp2, M_NFSSTRING); NFSEXITCODE2(error, nd); return (error); } /* * Make the malloc'd space large enough. This is a pain, but the xdr * doesn't set an upper bound on the side, so... */ static void nfsrv_refstrbigenough(int siz, u_char **cpp, u_char **cpp2, int *slenp) { u_char *cp; int i; if (siz <= *slenp) return; cp = malloc(siz + 1024, M_NFSSTRING, M_WAITOK); NFSBCOPY(*cpp, cp, *slenp); free(*cpp, M_NFSSTRING); i = *cpp2 - *cpp; *cpp = cp; *cpp2 = cp + i; *slenp = siz + 1024; } /* * Initialize the reply header data structures. */ APPLESTATIC void nfsrvd_rephead(struct nfsrv_descript *nd) { struct mbuf *mreq; if ((nd->nd_flag & ND_NOMAP) != 0) { - mreq = mb_alloc_ext_plus_pages(PAGE_SIZE, M_WAITOK, false, + mreq = mb_alloc_ext_plus_pages(PAGE_SIZE, M_WAITOK, mb_free_mext_pgs); nd->nd_mreq = nd->nd_mb = mreq; nd->nd_bpos = (char *)(void *) - PHYS_TO_DMAP(mreq->m_ext.ext_pgs->pa[0]); + PHYS_TO_DMAP(mreq->m_epg_pa[0]); nd->nd_bextpg = 0; nd->nd_bextpgsiz = PAGE_SIZE; } else { /* * If this is a big reply, use a cluster. */ if ((nd->nd_flag & ND_GSSINITREPLY) == 0 && nfs_bigreply[nd->nd_procnum]) { NFSMCLGET(mreq, M_WAITOK); nd->nd_mreq = mreq; nd->nd_mb = mreq; } else { NFSMGET(mreq); nd->nd_mreq = mreq; nd->nd_mb = mreq; } nd->nd_bpos = mtod(mreq, char *); mreq->m_len = 0; } if ((nd->nd_flag & ND_GSSINITREPLY) == 0) NFSM_BUILD(nd->nd_errp, int *, NFSX_UNSIGNED); } /* * Lock a socket against others. * Currently used to serialize connect/disconnect attempts. */ int newnfs_sndlock(int *flagp) { struct timespec ts; NFSLOCKSOCK(); while (*flagp & NFSR_SNDLOCK) { *flagp |= NFSR_WANTSND; ts.tv_sec = 0; ts.tv_nsec = 0; (void) nfsmsleep((caddr_t)flagp, NFSSOCKMUTEXPTR, PZERO - 1, "nfsndlck", &ts); } *flagp |= NFSR_SNDLOCK; NFSUNLOCKSOCK(); return (0); } /* * Unlock the stream socket for others. */ void newnfs_sndunlock(int *flagp) { NFSLOCKSOCK(); if ((*flagp & NFSR_SNDLOCK) == 0) panic("nfs sndunlock"); *flagp &= ~NFSR_SNDLOCK; if (*flagp & NFSR_WANTSND) { *flagp &= ~NFSR_WANTSND; wakeup((caddr_t)flagp); } NFSUNLOCKSOCK(); } APPLESTATIC int nfsv4_getipaddr(struct nfsrv_descript *nd, struct sockaddr_in *sin, struct sockaddr_in6 *sin6, sa_family_t *saf, int *isudp) { struct in_addr saddr; uint32_t portnum, *tl; int i, j, k; sa_family_t af = AF_UNSPEC; char addr[64], protocol[5], *cp; int cantparse = 0, error = 0; uint16_t portv; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i >= 3 && i <= 4) { error = nfsrv_mtostr(nd, protocol, i); if (error) goto nfsmout; if (strcmp(protocol, "tcp") == 0) { af = AF_INET; *isudp = 0; } else if (strcmp(protocol, "udp") == 0) { af = AF_INET; *isudp = 1; } else if (strcmp(protocol, "tcp6") == 0) { af = AF_INET6; *isudp = 0; } else if (strcmp(protocol, "udp6") == 0) { af = AF_INET6; *isudp = 1; } else cantparse = 1; } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i < 0) { error = NFSERR_BADXDR; goto nfsmout; } else if (cantparse == 0 && i >= 11 && i < 64) { /* * The shortest address is 11chars and the longest is < 64. */ error = nfsrv_mtostr(nd, addr, i); if (error) goto nfsmout; /* Find the port# at the end and extract that. */ i = strlen(addr); k = 0; cp = &addr[i - 1]; /* Count back two '.'s from end to get port# field. */ for (j = 0; j < i; j++) { if (*cp == '.') { k++; if (k == 2) break; } cp--; } if (k == 2) { /* * The NFSv4 port# is appended as .N.N, where N is * a decimal # in the range 0-255, just like an inet4 * address. Cheat and use inet_aton(), which will * return a Class A address and then shift the high * order 8bits over to convert it to the port#. */ *cp++ = '\0'; if (inet_aton(cp, &saddr) == 1) { portnum = ntohl(saddr.s_addr); portv = (uint16_t)((portnum >> 16) | (portnum & 0xff)); } else cantparse = 1; } else cantparse = 1; if (cantparse == 0) { if (af == AF_INET) { if (inet_pton(af, addr, &sin->sin_addr) == 1) { sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_port = htons(portv); *saf = af; return (0); } } else { if (inet_pton(af, addr, &sin6->sin6_addr) == 1) { sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(portv); *saf = af; return (0); } } } } else { if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } error = EPERM; nfsmout: return (error); } /* * Handle an NFSv4.1 Sequence request for the session. * If reply != NULL, use it to return the cached reply, as required. * The client gets a cached reply via this call for callbacks, however the * server gets a cached reply via the nfsv4_seqsess_cachereply() call. */ int nfsv4_seqsession(uint32_t seqid, uint32_t slotid, uint32_t highslot, struct nfsslot *slots, struct mbuf **reply, uint16_t maxslot) { int error; error = 0; if (reply != NULL) *reply = NULL; if (slotid > maxslot) return (NFSERR_BADSLOT); if (seqid == slots[slotid].nfssl_seq) { /* A retry. */ if (slots[slotid].nfssl_inprog != 0) error = NFSERR_DELAY; else if (slots[slotid].nfssl_reply != NULL) { if (reply != NULL) { *reply = slots[slotid].nfssl_reply; slots[slotid].nfssl_reply = NULL; } slots[slotid].nfssl_inprog = 1; error = NFSERR_REPLYFROMCACHE; } else /* No reply cached, so just do it. */ slots[slotid].nfssl_inprog = 1; } else if ((slots[slotid].nfssl_seq + 1) == seqid) { if (slots[slotid].nfssl_reply != NULL) m_freem(slots[slotid].nfssl_reply); slots[slotid].nfssl_reply = NULL; slots[slotid].nfssl_inprog = 1; slots[slotid].nfssl_seq++; } else error = NFSERR_SEQMISORDERED; return (error); } /* * Cache this reply for the slot. * Use the "rep" argument to return the cached reply if repstat is set to * NFSERR_REPLYFROMCACHE. The client never sets repstat to this value. */ void nfsv4_seqsess_cacherep(uint32_t slotid, struct nfsslot *slots, int repstat, struct mbuf **rep) { if (repstat == NFSERR_REPLYFROMCACHE) { *rep = slots[slotid].nfssl_reply; slots[slotid].nfssl_reply = NULL; } else { if (slots[slotid].nfssl_reply != NULL) m_freem(slots[slotid].nfssl_reply); slots[slotid].nfssl_reply = *rep; } slots[slotid].nfssl_inprog = 0; } /* * Generate the xdr for an NFSv4.1 Sequence Operation. */ APPLESTATIC void nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, struct nfsclsession *sep, int dont_replycache) { uint32_t *tl, slotseq = 0; int error, maxslot, slotpos; uint8_t sessionid[NFSX_V4SESSIONID]; error = nfsv4_sequencelookup(nmp, sep, &slotpos, &maxslot, &slotseq, sessionid); nd->nd_maxreq = sep->nfsess_maxreq; nd->nd_maxresp = sep->nfsess_maxresp; /* Build the Sequence arguments. */ NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); nd->nd_sequence = tl; bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; nd->nd_slotseq = tl; if (error == 0) { nd->nd_flag |= ND_HASSLOTID; nd->nd_slotid = slotpos; *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl++ = txdr_unsigned(maxslot); if (dont_replycache == 0) *tl = newnfs_true; else *tl = newnfs_false; } else { /* * There are two errors and the rest of the session can * just be zeros. * NFSERR_BADSESSION: This bad session should just generate * the same error again when the RPC is retried. * ESTALE: A forced dismount is in progress and will cause the * RPC to fail later. */ *tl++ = 0; *tl++ = 0; *tl++ = 0; *tl = 0; } nd->nd_flag |= ND_HASSEQUENCE; } int nfsv4_sequencelookup(struct nfsmount *nmp, struct nfsclsession *sep, int *slotposp, int *maxslotp, uint32_t *slotseqp, uint8_t *sessionid) { int i, maxslot, slotpos; uint64_t bitval; /* Find an unused slot. */ slotpos = -1; maxslot = -1; mtx_lock(&sep->nfsess_mtx); do { if (nmp != NULL && sep->nfsess_defunct != 0) { /* Just return the bad session. */ bcopy(sep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID); mtx_unlock(&sep->nfsess_mtx); return (NFSERR_BADSESSION); } bitval = 1; for (i = 0; i < sep->nfsess_foreslots; i++) { if ((bitval & sep->nfsess_slots) == 0) { slotpos = i; sep->nfsess_slots |= bitval; sep->nfsess_slotseq[i]++; *slotseqp = sep->nfsess_slotseq[i]; break; } bitval <<= 1; } if (slotpos == -1) { /* * If a forced dismount is in progress, just return. * This RPC attempt will fail when it calls * newnfs_request(). */ if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) { mtx_unlock(&sep->nfsess_mtx); return (ESTALE); } /* Wake up once/sec, to check for a forced dismount. */ (void)mtx_sleep(&sep->nfsess_slots, &sep->nfsess_mtx, PZERO, "nfsclseq", hz); } } while (slotpos == -1); /* Now, find the highest slot in use. (nfsc_slots is 64bits) */ bitval = 1; for (i = 0; i < 64; i++) { if ((bitval & sep->nfsess_slots) != 0) maxslot = i; bitval <<= 1; } bcopy(sep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID); mtx_unlock(&sep->nfsess_mtx); *slotposp = slotpos; *maxslotp = maxslot; return (0); } /* * Free a session slot. */ APPLESTATIC void nfsv4_freeslot(struct nfsclsession *sep, int slot) { uint64_t bitval; bitval = 1; if (slot > 0) bitval <<= slot; mtx_lock(&sep->nfsess_mtx); if ((bitval & sep->nfsess_slots) == 0) printf("freeing free slot!!\n"); sep->nfsess_slots &= ~bitval; wakeup(&sep->nfsess_slots); mtx_unlock(&sep->nfsess_mtx); } /* * Search for a matching pnfsd DS, based on the nmp arg. * Return one if found, NULL otherwise. */ struct nfsdevice * nfsv4_findmirror(struct nfsmount *nmp) { struct nfsdevice *ds; mtx_assert(NFSDDSMUTEXPTR, MA_OWNED); /* * Search the DS server list for a match with nmp. */ if (nfsrv_devidcnt == 0) return (NULL); TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp == nmp) { NFSCL_DEBUG(4, "nfsv4_findmirror: fnd main ds\n"); break; } } return (ds); } /* * Fill in the fields of "struct nfsrv_descript" for a new ext_pgs mbuf. * The build argument is true for build and false for dissect. */ int nfsm_set(struct nfsrv_descript *nd, u_int offs, bool build) { struct mbuf *m; struct mbuf_ext_pgs *pgs; int rlen; if (build) m = nd->nd_mb; else m = nd->nd_md; if ((m->m_flags & M_NOMAP) != 0) { if (build) { - pgs = m->m_ext.ext_pgs; + pgs = &m->m_ext_pgs; nd->nd_bextpg = 0; while (offs > 0) { if (nd->nd_bextpg == 0) rlen = mbuf_ext_pg_len(pgs, 0, pgs->first_pg_off); else rlen = mbuf_ext_pg_len(pgs, nd->nd_bextpg, 0); if (offs <= rlen) break; offs -= rlen; nd->nd_bextpg++; if (nd->nd_bextpg == pgs->npgs) { printf("nfsm_set: build offs " "out of range\n"); nd->nd_bextpg--; break; } } nd->nd_bpos = (char *)(void *) - PHYS_TO_DMAP(pgs->pa[nd->nd_bextpg]); + PHYS_TO_DMAP(m->m_epg_pa[nd->nd_bextpg]); if (nd->nd_bextpg == 0) nd->nd_bpos += pgs->first_pg_off; if (offs > 0) { nd->nd_bpos += offs; rlen = nd->nd_bextpgsiz = rlen - offs; } else if (nd->nd_bextpg == 0) rlen = nd->nd_bextpgsiz = PAGE_SIZE - pgs->first_pg_off; else rlen = nd->nd_bextpgsiz = PAGE_SIZE; } else { - pgs = m->m_ext.ext_pgs; + pgs = &m->m_ext_pgs; nd->nd_dextpg = 0; do { nd->nd_dpos = (char *)(void *) - PHYS_TO_DMAP(pgs->pa[nd->nd_dextpg]); + PHYS_TO_DMAP(m->m_epg_pa[nd->nd_dextpg]); if (nd->nd_dextpg == 0) { nd->nd_dpos += pgs->first_pg_off; rlen = nd->nd_dextpgsiz = mbuf_ext_pg_len(pgs, 0, pgs->first_pg_off); } else rlen = nd->nd_dextpgsiz = mbuf_ext_pg_len(pgs, nd->nd_dextpg, 0); if (offs > rlen) { nd->nd_dextpg++; offs -= rlen; } else if (offs > 0) { nd->nd_dpos += offs; nd->nd_dextpgsiz -= offs; offs = 0; } } while (offs > 0); } } else if (build) { nd->nd_bpos = mtod(m, char *) + offs; rlen = m->m_len - offs; } else { nd->nd_dpos = mtod(m, char *); rlen = m->m_len; } return (rlen); } /* * Copy up to "len" bytes from the mbuf into "cp" and adjust the * mbuf accordingly. * If cp == NULL, do not do the actual copy, but adjust the mbuf. * Return the number of bytes actually copied. * Adjust m_data and m_len so that a future calculation of what * is left using mtod() will work correctly. */ static int nfsm_copyfrommbuf(struct nfsrv_descript *nd, char *cp, enum uio_seg segflg, int len) { struct mbuf *m; int xfer; m = nd->nd_md; xfer = mtod(m, char *) + m->m_len - nd->nd_dpos; xfer = min(xfer, len); if (xfer > 0) { if (cp != NULL) { if (segflg == UIO_SYSSPACE) memcpy(cp, nd->nd_dpos, xfer); else copyout(nd->nd_dpos, cp, xfer); } nd->nd_dpos += xfer; m->m_data += xfer; m->m_len -= xfer; } return (xfer); } /* * Copy up to "len" bytes from the mbuf into "cp" and adjust the * mbuf accordingly. * If cp == NULL, do not do the actual copy, but adjust the mbuf. * Return the number of bytes actually copied. * Same as above, but for an ext_pgs mbuf. */ static int nfsm_copyfrommbuf_extpgs(struct nfsrv_descript *nd, char *cp, enum uio_seg segflg, int len) { struct mbuf_ext_pgs *pgs; int tlen, xfer; - pgs = nd->nd_md->m_ext.ext_pgs; + pgs = &nd->nd_md->m_ext_pgs; tlen = 0; /* Copy from the page(s) into cp. */ do { xfer = nd->nd_dextpgsiz; xfer = min(xfer, len); if (cp != NULL && xfer > 0) { if (segflg == UIO_SYSSPACE) memcpy(cp, nd->nd_dpos, xfer); else copyout(nd->nd_dpos, cp, xfer); cp += xfer; } tlen += xfer; len -= xfer; nd->nd_dextpgsiz -= xfer; nd->nd_dpos += xfer; if (nd->nd_dextpgsiz == 0 && len > 0 && nd->nd_dextpg < pgs->npgs - 1) { nd->nd_dextpg++; nd->nd_dpos = (char *)(void *) - PHYS_TO_DMAP(pgs->pa[nd->nd_dextpg]); + PHYS_TO_DMAP(nd->nd_md->m_epg_pa[nd->nd_dextpg]); nd->nd_dextpgsiz = mbuf_ext_pg_len(pgs, nd->nd_dextpg, 0); } } while (len > 0 && nd->nd_dextpgsiz > 0); return (tlen); } /* * Split an ext_pgs mbuf into two ext_pgs mbufs on a page boundary. */ static struct mbuf * nfsm_splitatpgno(struct mbuf *mp, int pgno, int how) { struct mbuf *m; struct mbuf_ext_pgs *pgs, *pgs0; int i, j, tlen; KASSERT((mp->m_flags & (M_EXT | M_NOMAP)) == (M_EXT | M_NOMAP), ("nfsm_splitatpgno: mp not ext_pgs")); - pgs = mp->m_ext.ext_pgs; + pgs = &mp->m_ext_pgs; KASSERT(pgno < pgs->npgs - 1, ("nfsm_splitatpgno:" " at the last page")); - m = mb_alloc_ext_pgs(how, false, mb_free_mext_pgs); + m = mb_alloc_ext_pgs(how, mb_free_mext_pgs); if (m == NULL) return (m); - pgs0 = m->m_ext.ext_pgs; + pgs0 = &m->m_ext_pgs; pgs0->flags |= MBUF_PEXT_FLAG_ANON; /* Move the pages beyond pgno to the new mbuf. */ for (i = pgno + 1, j = 0; i < pgs->npgs; i++, j++) - pgs0->pa[j] = pgs->pa[i]; + m->m_epg_pa[j] = mp->m_epg_pa[i]; pgs0->npgs = j; pgs0->last_pg_len = pgs->last_pg_len; pgs->npgs = pgno + 1; pgs->last_pg_len = PAGE_SIZE; if (pgno == 0) pgs->last_pg_len -= pgs->first_pg_off; /* Now set m_len for both mbufs. */ tlen = mbuf_ext_pg_len(pgs, 0, pgs->first_pg_off); for (i = 1; i < pgs->npgs; i++) tlen += mbuf_ext_pg_len(pgs, i, 0); mp->m_len = tlen; /* The new mbuf has first_pg_off == 0. */ tlen = 0; for (i = 0; i < pgs0->npgs; i++) tlen += mbuf_ext_pg_len(pgs0, i, 0); m->m_len = tlen; /* Link the new mbuf after mp. */ m->m_next = mp->m_next; mp->m_next = m; return (mp); } /* * Shift to the next mbuf in the list list and update the nd fields. * Return true if successful, false otherwise. */ bool nfsm_shiftnext(struct nfsrv_descript *nd, int *leftp) { nd->nd_md = nd->nd_md->m_next; if (nd->nd_md == NULL) return (false); *leftp = nfsm_set(nd, 0, false); return (true); } /* * Grow a ext_pgs mbuf list. Either allocate another page or add * an mbuf to the list. */ struct mbuf * nfsm_add_ext_pgs(struct mbuf *m, int maxextsiz, int *bextpg) { struct mbuf_ext_pgs *pgs; struct mbuf *mp; vm_page_t pg; - pgs = m->m_ext.ext_pgs; + pgs = &m->m_ext_pgs; if ((pgs->npgs + 1) * PAGE_SIZE > maxextsiz) { mp = mb_alloc_ext_plus_pages(PAGE_SIZE, M_WAITOK, - false, mb_free_mext_pgs); + mb_free_mext_pgs); *bextpg = 0; m->m_next = mp; } else { do { pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP | VM_ALLOC_WIRED); if (pg == NULL) vm_wait(NULL); } while (pg == NULL); - pgs->pa[pgs->npgs] = VM_PAGE_TO_PHYS(pg); + m->m_epg_pa[pgs->npgs] = VM_PAGE_TO_PHYS(pg); *bextpg = pgs->npgs; pgs->npgs++; pgs->last_pg_len = 0; mp = m; } return (mp); } /* * Calculate the data offset of m for dextpg and dextpgsiz. */ int nfsm_extpgs_calc_offs(struct mbuf *m, int dextpg, int dextpgsiz) { struct mbuf_ext_pgs *pgs; int cnt, offs; offs = 0; - pgs = m->m_ext.ext_pgs; + pgs = &m->m_ext_pgs; for (cnt = 0; cnt < dextpg; cnt++) { if (cnt == 0) offs += mbuf_ext_pg_len(pgs, 0, pgs->first_pg_off); else offs += mbuf_ext_pg_len(pgs, cnt, 0); } if (dextpg == 0) cnt = mbuf_ext_pg_len(pgs, 0, pgs->first_pg_off); else cnt = mbuf_ext_pg_len(pgs, dextpg, 0); offs += cnt - dextpgsiz; return (offs); } Index: projects/nfs-over-tls/sys/fs/nfs/nfsm_subs.h =================================================================== --- projects/nfs-over-tls/sys/fs/nfs/nfsm_subs.h (revision 360215) +++ projects/nfs-over-tls/sys/fs/nfs/nfsm_subs.h (revision 360216) @@ -1,185 +1,185 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSM_SUBS_H_ #define _NFS_NFSM_SUBS_H_ /* * These macros do strange and peculiar things to mbuf chains for * the assistance of the nfs code. To attempt to use them for any * other purpose will be dangerous. (they make weird assumptions) */ #ifndef APPLE /* * First define what the actual subs. return */ #define NFSM_DATAP(m, s) (m)->m_data += (s) /* * Now for the macros that do the simple stuff and call the functions * for the hard stuff. * They use fields in struct nfsrv_descript to handle the mbuf queues. * Replace most of the macro with an inline function, to minimize * the machine code. The inline functions in lower case can be called * directly, bypassing the macro. * For ND_NOMAP, if there is not enough contiguous space left in * the mbuf page, allocate a regular mbuf. The data in these regular * mbufs will need to be copied into pages later, since the data must * be filled pages. This should only happen after a write request or * read reply has been filled into the mbuf list. */ static __inline void * nfsm_build(struct nfsrv_descript *nd, int siz) { void *retp; struct mbuf *mb2; if ((nd->nd_flag & ND_NOMAP) == 0 && siz > M_TRAILINGSPACE(nd->nd_mb)) { NFSMCLGET(mb2, M_NOWAIT); if (siz > MLEN) panic("build > MLEN"); mb2->m_len = 0; nd->nd_bpos = mtod(mb2, char *); nd->nd_mb->m_next = mb2; nd->nd_mb = mb2; } else if ((nd->nd_flag & ND_NOMAP) != 0) { if (siz > nd->nd_bextpgsiz) { mb2 = mb_alloc_ext_plus_pages(PAGE_SIZE, M_WAITOK, - false, mb_free_mext_pgs); + mb_free_mext_pgs); nd->nd_bpos = (char *)(void *) - PHYS_TO_DMAP(mb2->m_ext.ext_pgs->pa[0]); + PHYS_TO_DMAP(mb2->m_epg_pa[0]); nd->nd_bextpg = 0; nd->nd_bextpgsiz = PAGE_SIZE - siz; nd->nd_mb->m_next = mb2; nd->nd_mb = mb2; } else nd->nd_bextpgsiz -= siz; - nd->nd_mb->m_ext.ext_pgs->last_pg_len += siz; + nd->nd_mb->m_ext_pgs.last_pg_len += siz; } retp = (void *)(nd->nd_bpos); nd->nd_mb->m_len += siz; nd->nd_bpos += siz; return (retp); } #define NFSM_BUILD(a, c, s) ((a) = (c)nfsm_build(nd, (s))) static __inline void * nfsm_dissect(struct nfsrv_descript *nd, int siz) { int tt1; void *retp; if ((nd->nd_md->m_flags & (M_EXT | M_NOMAP)) == (M_EXT | M_NOMAP)) { if (nd->nd_dextpgsiz >= siz) { retp = (void *)nd->nd_dpos; nd->nd_dpos += siz; nd->nd_dextpgsiz -= siz; } else retp = nfsm_dissct(nd, siz, M_WAITOK); } else { tt1 = mtod(nd->nd_md, char *) + nd->nd_md->m_len - nd->nd_dpos; if (tt1 >= siz) { retp = (void *)nd->nd_dpos; nd->nd_dpos += siz; } else retp = nfsm_dissct(nd, siz, M_WAITOK); } return (retp); } static __inline void * nfsm_dissect_nonblock(struct nfsrv_descript *nd, int siz) { int tt1; void *retp; if ((nd->nd_md->m_flags & (M_EXT | M_NOMAP)) == (M_EXT | M_NOMAP)) { if (nd->nd_dextpgsiz >= siz) { retp = (void *)nd->nd_dpos; nd->nd_dpos += siz; nd->nd_dextpgsiz -= siz; } else retp = nfsm_dissct(nd, siz, M_NOWAIT); } else { tt1 = mtod(nd->nd_md, char *) + nd->nd_md->m_len - nd->nd_dpos; if (tt1 >= siz) { retp = (void *)nd->nd_dpos; nd->nd_dpos += siz; } else retp = nfsm_dissct(nd, siz, M_NOWAIT); } return (retp); } #define NFSM_DISSECT(a, c, s) \ do { \ (a) = (c)nfsm_dissect(nd, (s)); \ if ((a) == NULL) { \ error = EBADRPC; \ goto nfsmout; \ } \ } while (0) #define NFSM_DISSECT_NONBLOCK(a, c, s) \ do { \ (a) = (c)nfsm_dissect_nonblock(nd, (s)); \ if ((a) == NULL) { \ error = EBADRPC; \ goto nfsmout; \ } \ } while (0) #endif /* !APPLE */ #define NFSM_STRSIZ(s, m) \ do { \ tl = (u_int32_t *)nfsm_dissect(nd, NFSX_UNSIGNED); \ if (!tl || ((s) = fxdr_unsigned(int32_t, *tl)) > (m)) { \ error = EBADRPC; \ goto nfsmout; \ } \ } while (0) #define NFSM_RNDUP(a) (((a)+3)&(~0x3)) #endif /* _NFS_NFSM_SUBS_H_ */ Index: projects/nfs-over-tls/sys/fs/nfsclient/nfs_clcomsubs.c =================================================================== --- projects/nfs-over-tls/sys/fs/nfsclient/nfs_clcomsubs.c (revision 360215) +++ projects/nfs-over-tls/sys/fs/nfsclient/nfs_clcomsubs.c (revision 360216) @@ -1,499 +1,499 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #ifndef APPLEKEXT #include extern struct nfsstatsv1 nfsstatsv1; extern int ncl_mbuf_mlen; extern enum vtype newnv2tov_type[8]; extern enum vtype nv34tov_type[8]; NFSCLSTATEMUTEX; #endif /* !APPLEKEXT */ static nfsuint64 nfs_nullcookie = {{ 0, 0 }}; /* * copies a uio scatter/gather list to an mbuf chain. * NOTE: can ony handle iovcnt == 1 */ APPLESTATIC void nfsm_uiombuf(struct nfsrv_descript *nd, struct uio *uiop, int siz) { char *uiocp; struct mbuf *mp, *mp2; int xfer, left, mlen; int uiosiz, clflg, rem; char *mcp, *tcp; KASSERT(uiop->uio_iovcnt == 1, ("nfsm_uiotombuf: iovcnt != 1")); if (siz > ncl_mbuf_mlen) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = NFSM_RNDUP(siz) - siz; mp = mp2 = nd->nd_mb; mcp = nd->nd_bpos; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { if ((nd->nd_flag & ND_NOMAP) != 0) mlen = nd->nd_bextpgsiz; else mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { if ((nd->nd_flag & ND_NOMAP) != 0) { mp = nfsm_add_ext_pgs(mp, nd->nd_maxextsiz, &nd->nd_bextpg); mcp = (char *)(void *)PHYS_TO_DMAP( - mp->m_ext.ext_pgs->pa[nd->nd_bextpg]); + mp->m_epg_pa[nd->nd_bextpg]); nd->nd_bextpgsiz = PAGE_SIZE; } else { if (clflg) NFSMCLGET(mp, M_WAITOK); else NFSMGET(mp); mp->m_len = 0; mlen = M_TRAILINGSPACE(mp); mcp = mtod(mp, char *); mp2->m_next = mp; mp2 = mp; } } xfer = (left > mlen) ? mlen : left; if (uiop->uio_segflg == UIO_SYSSPACE) NFSBCOPY(uiocp, mcp, xfer); else copyin(uiocp, mcp, xfer); mp->m_len += xfer; left -= xfer; uiocp += xfer; mcp += xfer; if ((nd->nd_flag & ND_NOMAP) != 0) { nd->nd_bextpgsiz -= xfer; - mp->m_ext.ext_pgs->last_pg_len += xfer; + mp->m_ext_pgs.last_pg_len += xfer; } uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } tcp = (char *)uiop->uio_iov->iov_base; tcp += uiosiz; uiop->uio_iov->iov_base = (void *)tcp; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } if (rem > 0) { if ((nd->nd_flag & ND_NOMAP) == 0 && rem > M_TRAILINGSPACE(mp)) { NFSMGET(mp); mp->m_len = 0; mp2->m_next = mp; mcp = mtod(mp, char *); } else if ((nd->nd_flag & ND_NOMAP) != 0 && rem > nd->nd_bextpgsiz) { mp = nfsm_add_ext_pgs(mp, nd->nd_maxextsiz, &nd->nd_bextpg); mcp = (char *)(void *) - PHYS_TO_DMAP(mp->m_ext.ext_pgs->pa[nd->nd_bextpg]); + PHYS_TO_DMAP(mp->m_epg_pa[nd->nd_bextpg]); nd->nd_bextpgsiz = PAGE_SIZE; } for (left = 0; left < rem; left++) *mcp++ = '\0'; mp->m_len += rem; nd->nd_bpos = mcp; if ((nd->nd_flag & ND_NOMAP) != 0) { nd->nd_bextpgsiz -= rem; - mp->m_ext.ext_pgs->last_pg_len += rem; + mp->m_ext_pgs.last_pg_len += rem; } } else nd->nd_bpos = mcp; nd->nd_mb = mp; } /* * copies a uio scatter/gather list to an mbuf chain. * This version returns the mbuf list and does not use "nd". * It allocates mbuf(s) of NFSM_RNDUP(siz) and ensures that * it is nul padded to a multiple of 4 bytes. * Since mbufs are allocated by this function, they will * always have space for an exact multiple of 4 bytes in * each mbuf. This implies that the nul padding can be * safely done without checking for available space in * the mbuf data area (or page for M_NOMAP mbufs). * NOTE: can ony handle iovcnt == 1 */ struct mbuf * nfsm_uiombuflist(bool doextpgs, int maxextsiz, struct uio *uiop, int siz, struct mbuf **mbp, char **cpp) { char *uiocp; struct mbuf *mp, *mp2, *firstmp; int i, left, mlen, rem, xfer; int uiosiz, clflg, bextpg, bextpgsiz = 0; char *mcp, *tcp; KASSERT(uiop->uio_iovcnt == 1, ("nfsm_uiotombuf: iovcnt != 1")); if (siz > ncl_mbuf_mlen) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = NFSM_RNDUP(siz) - siz; if (doextpgs) { mp = mb_alloc_ext_plus_pages(PAGE_SIZE, M_WAITOK, - false, mb_free_mext_pgs); + mb_free_mext_pgs); mcp = (char *)(void *) - PHYS_TO_DMAP(mp->m_ext.ext_pgs->pa[0]); + PHYS_TO_DMAP(mp->m_epg_pa[0]); bextpgsiz = PAGE_SIZE; bextpg = 0; } else { if (clflg != 0) NFSMCLGET(mp, M_WAITOK); else NFSMGET(mp); mp->m_len = 0; mcp = mtod(mp, char *); } firstmp = mp2 = mp; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { if (doextpgs) mlen = bextpgsiz; else mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { if (doextpgs) { mp = nfsm_add_ext_pgs(mp, maxextsiz, &bextpg); mcp = (char *)(void *)PHYS_TO_DMAP( - mp->m_ext.ext_pgs->pa[bextpg]); + mp->m_epg_pa[bextpg]); mlen = bextpgsiz = PAGE_SIZE; } else { if (clflg) NFSMCLGET(mp, M_WAITOK); else NFSMGET(mp); mp->m_len = 0; mcp = mtod(mp, char *); mlen = M_TRAILINGSPACE(mp); mp2->m_next = mp; mp2 = mp; } } xfer = (left > mlen) ? mlen : left; if (uiop->uio_segflg == UIO_SYSSPACE) NFSBCOPY(uiocp, mcp, xfer); else copyin(uiocp, mcp, xfer); mp->m_len += xfer; left -= xfer; uiocp += xfer; mcp += xfer; if (doextpgs) { bextpgsiz -= xfer; - mp->m_ext.ext_pgs->last_pg_len += xfer; + mp->m_ext_pgs.last_pg_len += xfer; } uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } tcp = (char *)uiop->uio_iov->iov_base; tcp += uiosiz; uiop->uio_iov->iov_base = (void *)tcp; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } for (i = 0; i < rem; i++) { *mcp++ = '\0'; mp->m_len++; if (doextpgs) - mp->m_ext.ext_pgs->last_pg_len++; + mp->m_ext_pgs.last_pg_len++; } if (cpp != NULL) *cpp = mcp; if (mbp != NULL) *mbp = mp; return (firstmp); } /* * Load vnode attributes from the xdr file attributes. * Returns EBADRPC if they can't be parsed, 0 otherwise. */ APPLESTATIC int nfsm_loadattr(struct nfsrv_descript *nd, struct nfsvattr *nap) { struct nfs_fattr *fp; int error = 0; if (nd->nd_flag & ND_NFSV4) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(fp, struct nfs_fattr *, NFSX_V3FATTR); nap->na_type = nfsv34tov_type(fp->fa_type); nap->na_mode = fxdr_unsigned(u_short, fp->fa_mode); nap->na_rdev = NFSMAKEDEV( fxdr_unsigned(int, fp->fa3_rdev.specdata1), fxdr_unsigned(int, fp->fa3_rdev.specdata2)); nap->na_nlink = fxdr_unsigned(uint32_t, fp->fa_nlink); nap->na_uid = fxdr_unsigned(uid_t, fp->fa_uid); nap->na_gid = fxdr_unsigned(gid_t, fp->fa_gid); nap->na_size = fxdr_hyper(&fp->fa3_size); nap->na_blocksize = NFS_FABLKSIZE; nap->na_bytes = fxdr_hyper(&fp->fa3_used); nap->na_fileid = fxdr_hyper(&fp->fa3_fileid); fxdr_nfsv3time(&fp->fa3_atime, &nap->na_atime); fxdr_nfsv3time(&fp->fa3_ctime, &nap->na_ctime); fxdr_nfsv3time(&fp->fa3_mtime, &nap->na_mtime); nap->na_flags = 0; nap->na_filerev = 0; } else { NFSM_DISSECT(fp, struct nfs_fattr *, NFSX_V2FATTR); nap->na_type = nfsv2tov_type(fp->fa_type); nap->na_mode = fxdr_unsigned(u_short, fp->fa_mode); if (nap->na_type == VNON || nap->na_type == VREG) nap->na_type = IFTOVT(nap->na_mode); nap->na_rdev = fxdr_unsigned(dev_t, fp->fa2_rdev); /* * Really ugly NFSv2 kludge. */ if (nap->na_type == VCHR && nap->na_rdev == ((dev_t)-1)) nap->na_type = VFIFO; nap->na_nlink = fxdr_unsigned(u_short, fp->fa_nlink); nap->na_uid = fxdr_unsigned(uid_t, fp->fa_uid); nap->na_gid = fxdr_unsigned(gid_t, fp->fa_gid); nap->na_size = fxdr_unsigned(u_int32_t, fp->fa2_size); nap->na_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize); nap->na_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks) * NFS_FABLKSIZE; nap->na_fileid = fxdr_unsigned(uint64_t, fp->fa2_fileid); fxdr_nfsv2time(&fp->fa2_atime, &nap->na_atime); fxdr_nfsv2time(&fp->fa2_mtime, &nap->na_mtime); nap->na_flags = 0; nap->na_ctime.tv_sec = fxdr_unsigned(u_int32_t, fp->fa2_ctime.nfsv2_sec); nap->na_ctime.tv_nsec = 0; nap->na_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec); nap->na_filerev = 0; } nfsmout: return (error); } /* * This function finds the directory cookie that corresponds to the * logical byte offset given. */ APPLESTATIC nfsuint64 * nfscl_getcookie(struct nfsnode *np, off_t off, int add) { struct nfsdmap *dp, *dp2; int pos; pos = off / NFS_DIRBLKSIZ; if (pos == 0) { KASSERT(!add, ("nfs getcookie add at 0")); return (&nfs_nullcookie); } pos--; dp = LIST_FIRST(&np->n_cookies); if (!dp) { if (add) { dp = malloc(sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp->ndm_eocookie = 0; LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); } else return (NULL); } while (pos >= NFSNUMCOOKIES) { pos -= NFSNUMCOOKIES; if (LIST_NEXT(dp, ndm_list) != NULL) { if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && pos >= dp->ndm_eocookie) return (NULL); dp = LIST_NEXT(dp, ndm_list); } else if (add) { dp2 = malloc(sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp2->ndm_eocookie = 0; LIST_INSERT_AFTER(dp, dp2, ndm_list); dp = dp2; } else return (NULL); } if (pos >= dp->ndm_eocookie) { if (add) dp->ndm_eocookie = pos + 1; else return (NULL); } return (&dp->ndm_cookies[pos]); } /* * Gets a file handle out of an nfs reply sent to the client and returns * the file handle and the file's attributes. * For V4, it assumes that Getfh and Getattr Op's results are here. */ APPLESTATIC int nfscl_mtofh(struct nfsrv_descript *nd, struct nfsfh **nfhpp, struct nfsvattr *nap, int *attrflagp) { u_int32_t *tl; int error = 0, flag = 1; *nfhpp = NULL; *attrflagp = 0; /* * First get the file handle and vnode. */ if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); flag = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); /* If the GetFH failed, clear flag. */ if (*++tl != 0) { nd->nd_flag |= ND_NOMOREDATA; flag = 0; error = ENXIO; /* Return ENXIO so *nfhpp isn't used. */ } } if (flag) { error = nfsm_getfh(nd, nfhpp); if (error) return (error); } /* * Now, get the attributes. */ if (flag != 0 && (nd->nd_flag & ND_NFSV4) != 0) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) { nd->nd_flag |= ND_NOMOREDATA; flag = 0; } } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (flag) { flag = fxdr_unsigned(int, *tl); } else if (fxdr_unsigned(int, *tl)) { error = nfsm_advance(nd, NFSX_V3FATTR, -1); if (error) return (error); } } if (flag) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = 1; } nfsmout: return (error); } /* * Initialize the owner/delegation sleep lock. */ APPLESTATIC void nfscl_lockinit(struct nfsv4lock *lckp) { lckp->nfslock_usecnt = 0; lckp->nfslock_lock = 0; } /* * Get an exclusive lock. (Not needed for OpenBSD4, since there is only one * thread for each posix process in the kernel.) */ APPLESTATIC void nfscl_lockexcl(struct nfsv4lock *lckp, void *mutex) { int igotlock; do { igotlock = nfsv4_lock(lckp, 1, NULL, mutex, NULL); } while (!igotlock); } /* * Release an exclusive lock. */ APPLESTATIC void nfscl_lockunlock(struct nfsv4lock *lckp) { nfsv4_unlock(lckp, 0); } /* * Called to derefernce a lock on a stateid (delegation or open owner). */ APPLESTATIC void nfscl_lockderef(struct nfsv4lock *lckp) { NFSLOCKCLSTATE(); lckp->nfslock_usecnt--; if (lckp->nfslock_usecnt == 0 && (lckp->nfslock_lock & NFSV4LOCK_WANTED)) { lckp->nfslock_lock &= ~NFSV4LOCK_WANTED; wakeup((caddr_t)lckp); } NFSUNLOCKCLSTATE(); } Index: projects/nfs-over-tls/sys/fs/nfsserver/nfs_nfsdport.c =================================================================== --- projects/nfs-over-tls/sys/fs/nfsserver/nfs_nfsdport.c (revision 360215) +++ projects/nfs-over-tls/sys/fs/nfsserver/nfs_nfsdport.c (revision 360216) @@ -1,6741 +1,6741 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include /* * Functions that perform the vfs operations required by the routines in * nfsd_serv.c. It is hoped that this change will make the server more * portable. */ #include #include #include #include #include #include #include #include FEATURE(nfsd, "NFSv4 server"); extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern int nfsrv_useacl; extern int newnfs_numnfsd; extern struct mount nfsv4root_mnt; extern struct nfsrv_stablefirst nfsrv_stablefirst; extern void (*nfsd_call_servertimer)(void); extern SVCPOOL *nfsrvd_pool; extern struct nfsv4lock nfsd_suspend_lock; extern struct nfsclienthashhead *nfsclienthash; extern struct nfslockhashhead *nfslockhash; extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; extern struct nfsstatsv1 nfsstatsv1; extern struct nfslayouthash *nfslayouthash; extern int nfsrv_layouthashsize; extern struct mtx nfsrv_dslock_mtx; extern int nfs_pnfsiothreads; extern struct nfsdontlisthead nfsrv_dontlisthead; extern volatile int nfsrv_dontlistlen; extern volatile int nfsrv_devidcnt; extern int nfsrv_maxpnfsmirror; extern bool nfs_use_ext_pgs; struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; NFSDLOCKMUTEX; NFSSTATESPINLOCK; struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; struct mtx nfsrc_udpmtx; struct mtx nfs_v4root_mutex; struct mtx nfsrv_dontlistlock_mtx; struct mtx nfsrv_recalllock_mtx; struct nfsrvfh nfs_rootfh, nfs_pubfh; int nfs_pubfhset = 0, nfs_rootfhset = 0; struct proc *nfsd_master_proc = NULL; int nfsd_debuglevel = 0; static pid_t nfsd_master_pid = (pid_t)-1; static char nfsd_master_comm[MAXCOMLEN + 1]; static struct timeval nfsd_master_start; static uint32_t nfsv4_sysid = 0; static fhandle_t zerofh; static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, struct ucred *); int nfsrv_enable_crossmntpt = 1; static int nfs_commit_blks; static int nfs_commit_miss; extern int nfsrv_issuedelegs; extern int nfsrv_dolocallocks; extern int nfsd_enable_stringtouid; extern struct nfsdevicehead nfsrv_devidhead; static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **, struct iovec **); static int nfsrv_createiovec_extpgs(int, int, struct mbuf **, struct mbuf **, struct iovec **); static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **, int *); static int nfsrv_createiovecw_extpgs(int, struct mbuf *, char *, int, int, struct iovec **, int *); static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, NFSPROC_T *); static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, int *, char *, fhandle_t *); static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, NFSPROC_T *); static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *, struct thread *, int, struct mbuf **, struct nfsrv_descript *, struct mbuf **, struct nfsvattr *, struct acl *, off_t *, int, bool *); static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, struct nfsrv_descript *, int *); static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct acl *, int *); static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount *, struct nfsvattr *); static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *, NFSPROC_T *, struct nfsmount *); static int nfsrv_putfhname(fhandle_t *, char *); static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, struct pnfsdsfile *, struct vnode **, NFSPROC_T *); static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *, struct vnode *, NFSPROC_T *); static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, NFSPROC_T *); static int nfsrv_pnfsstatfs(struct statfs *, struct mount *); int nfs_pnfsio(task_fn_t *, void *); SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 0, "Debug level for NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); static int nfsrv_pnfsgetdsattr = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); /* * nfsrv_dsdirsize can only be increased and only when the nfsd threads are * not running. * The dsN subdirectories for the increased values must have been created * on all DS servers before this increase is done. */ u_int nfsrv_dsdirsize = 20; static int sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) { int error, newdsdirsize; newdsdirsize = nfsrv_dsdirsize; error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || newnfs_numnfsd != 0) return (EINVAL); nfsrv_dsdirsize = newdsdirsize; return (0); } SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize), sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers"); #define MAX_REORDERED_RPC 16 #define NUM_HEURISTIC 1031 #define NHUSE_INIT 64 #define NHUSE_INC 16 #define NHUSE_MAX 2048 static struct nfsheur { struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ off_t nh_nextoff; /* next offset for sequential detection */ int nh_use; /* use count for selection */ int nh_seqcount; /* heuristic */ } nfsheur[NUM_HEURISTIC]; /* * Heuristic to detect sequential operation. */ static struct nfsheur * nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) { struct nfsheur *nh; int hi, try; /* Locate best candidate. */ try = 32; hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; nh = &nfsheur[hi]; while (try--) { if (nfsheur[hi].nh_vp == vp) { nh = &nfsheur[hi]; break; } if (nfsheur[hi].nh_use > 0) --nfsheur[hi].nh_use; hi = (hi + 1) % NUM_HEURISTIC; if (nfsheur[hi].nh_use < nh->nh_use) nh = &nfsheur[hi]; } /* Initialize hint if this is a new file. */ if (nh->nh_vp != vp) { nh->nh_vp = vp; nh->nh_nextoff = uio->uio_offset; nh->nh_use = NHUSE_INIT; if (uio->uio_offset == 0) nh->nh_seqcount = 4; else nh->nh_seqcount = 1; } /* Calculate heuristic. */ if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || uio->uio_offset == nh->nh_nextoff) { /* See comments in vfs_vnops.c:sequential_heuristic(). */ nh->nh_seqcount += howmany(uio->uio_resid, 16384); if (nh->nh_seqcount > IO_SEQMAX) nh->nh_seqcount = IO_SEQMAX; } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { /* Probably a reordered RPC, leave seqcount alone. */ } else if (nh->nh_seqcount > 1) { nh->nh_seqcount /= 2; } else { nh->nh_seqcount = 0; } nh->nh_use += NHUSE_INC; if (nh->nh_use > NHUSE_MAX) nh->nh_use = NHUSE_MAX; return (nh); } /* * Get attributes into nfsvattr structure. */ int nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct nfsrv_descript *nd, struct thread *p, int vpislocked, nfsattrbit_t *attrbitp) { int error, gotattr, lockedit = 0; struct nfsvattr na; if (vpislocked == 0) { /* * When vpislocked == 0, the vnode is either exclusively * locked by this thread or not locked by this thread. * As such, shared lock it, if not exclusively locked. */ if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { lockedit = 1; NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); } } /* * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed * attributes, as required. * This needs to be done for regular files if: * - non-NFSv4 RPCs or * - when attrbitp == NULL or * - an NFSv4 RPC with any of the above attributes in attrbitp. * A return of 0 for nfsrv_proxyds() indicates that it has acquired * these attributes. nfsrv_proxyds() will return an error if the * server is not a pNFS one. */ gotattr = 0; if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || (nd->nd_flag & ND_NFSV4) == 0 || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) { error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p, NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0, NULL); if (error == 0) gotattr = 1; } error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); if (lockedit != 0) NFSVOPUNLOCK(vp); /* * If we got the Change, Size and Modify Time from the DS, * replace them. */ if (gotattr != 0) { nvap->na_atime = na.na_atime; nvap->na_mtime = na.na_mtime; nvap->na_filerev = na.na_filerev; nvap->na_size = na.na_size; nvap->na_bytes = na.na_bytes; } NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, error, (uintmax_t)na.na_filerev); NFSEXITCODE(error); return (error); } /* * Get a file handle for a vnode. */ int nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) { int error; NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VOP_VPTOFH(vp, &fhp->fh_fid); NFSEXITCODE(error); return (error); } /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use * vn_writechk() and VOP_ACCESSX() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write * case. * 2 - The owner is to be given access irrespective of mode bits for some * operations, so that processes that chmod after opening a file don't * break. */ int nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, u_int32_t *supportedtypep) { struct vattr vattr; int error = 0, getret = 0; if (vpislocked == 0) { if (NFSVOPLOCK(vp, LK_SHARED) != 0) { error = EPERM; goto out; } } if (accmode & VWRITE) { /* Just vn_writechk() changed to check rdonly */ /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character * device resident on the file system. */ if (NFSVNO_EXRDONLY(exp) || (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: error = EROFS; default: break; } } /* * If there's shared text associated with * the inode, try to free it up once. If * we fail, we can't allow writing. */ if (VOP_IS_TEXT(vp) && error == 0) error = ETXTBSY; } if (error != 0) { if (vpislocked == 0) NFSVOPUNLOCK(vp); goto out; } /* * Should the override still be applied when ACLs are enabled? */ error = VOP_ACCESSX(vp, accmode, cred, p); if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { /* * Try again with VEXPLICIT_DENY, to see if the test for * deletion is supported. */ error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); if (error == 0) { if (vp->v_type == VDIR) { accmode &= ~(VDELETE | VDELETE_CHILD); accmode |= VWRITE; error = VOP_ACCESSX(vp, accmode, cred, p); } else if (supportedtypep != NULL) { *supportedtypep &= ~NFSACCESS_DELETE; } } } /* * Allow certain operations for the owner (reads and writes * on files that are already open). */ if (override != NFSACCCHK_NOOVERRIDE && (error == EPERM || error == EACCES)) { if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) error = 0; else if (override & NFSACCCHK_ALLOWOWNER) { getret = VOP_GETATTR(vp, &vattr, cred); if (getret == 0 && cred->cr_uid == vattr.va_uid) error = 0; } } if (vpislocked == 0) NFSVOPUNLOCK(vp); out: NFSEXITCODE(error); return (error); } /* * Set attribute(s) vnop. */ int nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { u_quad_t savsize = 0; int error, savedit; /* * If this is an exported file system and a pNFS service is running, * don't VOP_SETATTR() of size for the MDS file system. */ savedit = 0; error = 0; if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 && nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL && nvap->na_vattr.va_size > 0) { savsize = nvap->na_vattr.va_size; nvap->na_vattr.va_size = VNOVAL; if (nvap->na_vattr.va_uid != (uid_t)VNOVAL || nvap->na_vattr.va_gid != (gid_t)VNOVAL || nvap->na_vattr.va_mode != (mode_t)VNOVAL || nvap->na_vattr.va_atime.tv_sec != VNOVAL || nvap->na_vattr.va_mtime.tv_sec != VNOVAL) savedit = 1; else savedit = 2; } if (savedit != 2) error = VOP_SETATTR(vp, &nvap->na_vattr, cred); if (savedit != 0) nvap->na_vattr.va_size = savsize; if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || nvap->na_vattr.va_gid != (gid_t)VNOVAL || nvap->na_vattr.va_size != VNOVAL || nvap->na_vattr.va_mode != (mode_t)VNOVAL || nvap->na_vattr.va_atime.tv_sec != VNOVAL || nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { /* For a pNFS server, set the attributes on the DS file. */ error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR, NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL); if (error == ENOENT) error = 0; } NFSEXITCODE(error); return (error); } /* * Set up nameidata for a lookup() call and do it. */ int nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, struct vnode **retdirp) { struct componentname *cnp = &ndp->ni_cnd; int i; struct iovec aiov; struct uio auio; int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; int error = 0; char *cp; *retdirp = NULL; cnp->cn_nameptr = cnp->cn_pnbuf; ndp->ni_lcf = 0; /* * Extract and set starting directory. */ if (dp->v_type != VDIR) { if (islocked) vput(dp); else vrele(dp); nfsvno_relpathbuf(ndp); error = ENOTDIR; goto out1; } if (islocked) NFSVOPUNLOCK(dp); VREF(dp); *retdirp = dp; if (NFSVNO_EXRDONLY(exp)) cnp->cn_flags |= RDONLY; ndp->ni_segflg = UIO_SYSSPACE; if (nd->nd_flag & ND_PUBLOOKUP) { ndp->ni_loopcnt = 0; if (cnp->cn_pnbuf[0] == '/') { vrele(dp); /* * Check for degenerate pathnames here, since lookup() * panics on them. */ for (i = 1; i < ndp->ni_pathlen; i++) if (cnp->cn_pnbuf[i] != '/') break; if (i == ndp->ni_pathlen) { error = NFSERR_ACCES; goto out; } dp = rootvnode; VREF(dp); } } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || (nd->nd_flag & ND_NFSV4) == 0) { /* * Only cross mount points for NFSv4 when doing a * mount while traversing the file system above * the mount point, unless nfsrv_enable_crossmntpt is set. */ cnp->cn_flags |= NOCROSSMOUNT; } /* * Initialize for scan, set ni_startdir and bump ref on dp again * because lookup() will dereference ni_startdir. */ cnp->cn_thread = p; ndp->ni_startdir = dp; ndp->ni_rootdir = rootvnode; ndp->ni_topdir = NULL; if (!lockleaf) cnp->cn_flags |= LOCKLEAF; for (;;) { cnp->cn_nameptr = cnp->cn_pnbuf; /* * Call lookup() to do the real work. If an error occurs, * ndp->ni_vp and ni_dvp are left uninitialized or NULL and * we do not have to dereference anything before returning. * In either case ni_startdir will be dereferenced and NULLed * out. */ error = lookup(ndp); if (error) break; /* * Check for encountering a symbolic link. Trivial * termination occurs if no symlink encountered. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) nfsvno_relpathbuf(ndp); if (ndp->ni_vp && !lockleaf) NFSVOPUNLOCK(ndp->ni_vp); break; } /* * Validate symlink */ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) NFSVOPUNLOCK(ndp->ni_dvp); if (!(nd->nd_flag & ND_PUBLOOKUP)) { error = EINVAL; goto badlink2; } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; goto badlink2; } if (ndp->ni_pathlen > 1) cp = uma_zalloc(namei_zone, M_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = NULL; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { badlink1: if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); badlink2: vrele(ndp->ni_dvp); vput(ndp->ni_vp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { error = ENOENT; goto badlink1; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { error = ENAMETOOLONG; goto badlink1; } /* * Adjust or replace path */ if (ndp->ni_pathlen > 1) { NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; /* * Cleanup refs for next loop and check if root directory * should replace current directory. Normally ni_dvp * becomes the new base directory and is cleaned up when * we loop. Explicitly null pointers after invalidation * to clarify operation. */ vput(ndp->ni_vp); ndp->ni_vp = NULL; if (cnp->cn_pnbuf[0] == '/') { vrele(ndp->ni_dvp); ndp->ni_dvp = ndp->ni_rootdir; VREF(ndp->ni_dvp); } ndp->ni_startdir = ndp->ni_dvp; ndp->ni_dvp = NULL; } if (!lockleaf) cnp->cn_flags &= ~LOCKLEAF; out: if (error) { nfsvno_relpathbuf(ndp); ndp->ni_vp = NULL; ndp->ni_dvp = NULL; ndp->ni_startdir = NULL; } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { ndp->ni_dvp = NULL; } out1: NFSEXITCODE2(error, nd); return (error); } /* * Set up a pathname buffer and return a pointer to it and, optionally * set a hash pointer. */ void nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) { struct componentname *cnp = &ndp->ni_cnd; cnp->cn_flags |= (NOMACCHECK | HASBUF); cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); if (hashpp != NULL) *hashpp = NULL; *bufpp = cnp->cn_pnbuf; } /* * Release the above path buffer, if not released by nfsvno_namei(). */ void nfsvno_relpathbuf(struct nameidata *ndp) { if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) panic("nfsrelpath"); uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } /* * Readlink vnode op into an mbuf list. */ int nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) { struct iovec *iv; struct uio io, *uiop = &io; struct mbuf *mp, *mp3; int len, tlen, error = 0; len = NFS_MAXPATHLEN; if (maxextsiz > 0) uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, &mp3, &mp, &iv); else uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv); uiop->uio_iov = iv; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; error = VOP_READLINK(vp, uiop, cred); free(iv, M_TEMP); if (error) { m_freem(mp3); *lenp = 0; goto out; } if (uiop->uio_resid > 0) { len -= uiop->uio_resid; tlen = NFSM_RNDUP(len); if (tlen == 0) { m_freem(mp3); mp3 = mp = NULL; } else if (tlen != NFS_MAXPATHLEN || tlen != len) mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len); } *lenp = len; *mpp = mp3; *mpendp = mp; out: NFSEXITCODE(error); return (error); } /* * Create an mbuf chain and an associated iovec that can be used to Read * or Getextattr of data. * Upon success, return pointers to the first and last mbufs in the chain * plus the malloc'd iovec and its iovlen. */ static int nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp, struct iovec **ivp) { struct mbuf *m, *m2 = NULL, *m3; struct iovec *iv; int i, left, siz; left = len; m3 = NULL; /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; while (left > 0) { NFSMGET(m); MCLGET(m, M_WAITOK); m->m_len = 0; siz = min(M_TRAILINGSPACE(m), left); left -= siz; i++; if (m3) m2->m_next = m; else m3 = m; m2 = m; } *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); m = m3; left = len; i = 0; while (left > 0) { if (m == NULL) panic("nfsrv_createiovec iov"); siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { iv->iov_base = mtod(m, caddr_t) + m->m_len; iv->iov_len = siz; m->m_len += siz; left -= siz; iv++; i++; } m = m->m_next; } *mpp = m3; *mpendp = m2; return (i); } /* * Create an mbuf chain and an associated iovec that can be used to Read * or Getextattr of data. * Upon success, return pointers to the first and last mbufs in the chain * plus the malloc'd iovec and its iovlen. * Same as above, but creates ext_pgs mbuf(s). */ static int nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp, struct mbuf **mpendp, struct iovec **ivp) { struct mbuf *m, *m2 = NULL, *m3; struct mbuf_ext_pgs *pgs; struct iovec *iv; int i, left, pgno, siz; left = len; m3 = NULL; /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; while (left > 0) { siz = min(left, maxextsiz); - m = mb_alloc_ext_plus_pages(siz, M_WAITOK, false, + m = mb_alloc_ext_plus_pages(siz, M_WAITOK, mb_free_mext_pgs); left -= siz; - i += m->m_ext.ext_pgs->npgs; + i += m->m_ext_pgs.npgs; if (m3 != NULL) m2->m_next = m; else m3 = m; m2 = m; } *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); m = m3; left = len; i = 0; pgno = 0; - pgs = m->m_ext.ext_pgs; + pgs = &m->m_ext_pgs; while (left > 0) { if (m == NULL) panic("nfsvno_createiovec_extpgs iov"); siz = min(PAGE_SIZE, left); if (siz > 0) { - iv->iov_base = (void *)PHYS_TO_DMAP(pgs->pa[pgno]); + iv->iov_base = (void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]); iv->iov_len = siz; m->m_len += siz; if (pgno == pgs->npgs - 1) pgs->last_pg_len = siz; left -= siz; iv++; i++; pgno++; } if (pgno == pgs->npgs && left > 0) { m = m->m_next; if (m == NULL) panic("nfsvno_createiovec_extpgs iov"); - pgs = m->m_ext.ext_pgs; + pgs = &m->m_ext_pgs; pgno = 0; } } *mpp = m3; *mpendp = m2; return (i); } /* * Read vnode op call into mbuf list. */ int nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, int maxextsiz, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) { struct mbuf *m; struct iovec *iv; int error = 0, len, tlen, ioflag = 0; struct mbuf *m3; struct uio io, *uiop = &io; struct nfsheur *nh; /* * Attempt to read from a DS file. A return of ENOENT implies * there is no DS file to read. */ error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, NULL, mpendp, NULL, NULL, NULL, 0, NULL); if (error != ENOENT) return (error); len = NFSM_RNDUP(cnt); if (maxextsiz > 0) uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, &m3, &m, &iv); else uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv); uiop->uio_iov = iv; uiop->uio_offset = off; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; nh = nfsrv_sequential_heuristic(uiop, vp); ioflag |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); free(iv, M_TEMP); if (error) { m_freem(m3); *mpp = NULL; goto out; } nh->nh_nextoff = uiop->uio_offset; tlen = len - uiop->uio_resid; cnt = cnt < tlen ? cnt : tlen; tlen = NFSM_RNDUP(cnt); if (tlen == 0) { m_freem(m3); m3 = m = NULL; } else if (len != tlen || tlen != cnt) m = nfsrv_adj(m3, len - tlen, tlen - cnt); *mpp = m3; *mpendp = m; out: NFSEXITCODE(error); return (error); } /* * Create the iovec for the mbuf chain passed in as an argument. * The "cp" argument is where the data starts within the first mbuf in * the chain. It returns the iovec and the iovcnt. */ static int nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp, int *iovcntp) { struct mbuf *mp; struct iovec *ivp; int cnt, i, len; /* * Loop through the mbuf chain, counting how many mbufs are a * part of this write operation, so the iovec size is known. */ cnt = 0; len = retlen; mp = m; i = mtod(mp, caddr_t) + mp->m_len - cp; while (len > 0) { if (i > 0) { len -= i; cnt++; } mp = mp->m_next; if (!mp) { if (len > 0) return (EBADRPC); } else i = mp->m_len; } /* Now, create the iovec. */ mp = m; *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); *iovcntp = cnt; i = mtod(mp, caddr_t) + mp->m_len - cp; len = retlen; while (len > 0) { if (mp == NULL) panic("nfsrv_createiovecw"); if (i > 0) { i = min(i, len); ivp->iov_base = cp; ivp->iov_len = i; ivp++; len -= i; } mp = mp->m_next; if (mp) { i = mp->m_len; cp = mtod(mp, caddr_t); } } return (0); } /* * Create the iovec for the mbuf chain passed in as an argument. * The "cp" argument is where the data starts within the first mbuf in * the chain. It returns the iovec and the iovcnt. * Same as above, but for ext_pgs mbufs. */ static int nfsrv_createiovecw_extpgs(int retlen, struct mbuf *m, char *cp, int dextpg, int dextpgsiz, struct iovec **ivpp, int *iovcntp) { struct mbuf *mp; struct mbuf_ext_pgs *pgs; struct iovec *ivp; int cnt, i, len, pgno; /* * Loop through the mbuf chain, counting how many pages are * part of this write oepration, so the iovec size is known. */ cnt = 0; len = retlen; mp = m; - pgs = mp->m_ext.ext_pgs; + pgs = &mp->m_ext_pgs; i = dextpgsiz; pgno = dextpg; while (len > 0) { if (i > 0) { len -= i; cnt++; } if (len > 0) { if (pgno == pgs->npgs - 1) { mp = mp->m_next; if (mp == NULL) return (EBADRPC); pgno = 0; - pgs = mp->m_ext.ext_pgs; + pgs = &mp->m_ext_pgs; } else pgno++; if (pgno == 0) i = mbuf_ext_pg_len(pgs, 0, pgs->first_pg_off); else i = mbuf_ext_pg_len(pgs, pgno, 0); } } /* Now, create the iovec. */ mp = m; *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); *iovcntp = cnt; len = retlen; - pgs = mp->m_ext.ext_pgs; + pgs = &mp->m_ext_pgs; i = dextpgsiz; pgno = dextpg; while (len > 0) { if (i > 0) { i = min(i, len); ivp->iov_base = cp; ivp->iov_len = i; ivp++; len -= i; } if (len > 0) { if (pgno == pgs->npgs - 1) { mp = mp->m_next; if (mp == NULL) return (EBADRPC); pgno = 0; - pgs = mp->m_ext.ext_pgs; + pgs = &mp->m_ext_pgs; } else pgno++; cp = (char *)(void *) - PHYS_TO_DMAP(pgs->pa[pgno]); + PHYS_TO_DMAP(mp->m_epg_pa[pgno]); if (pgno == 0) { cp += pgs->first_pg_off; i = mbuf_ext_pg_len(pgs, 0, pgs->first_pg_off); } else i = mbuf_ext_pg_len(pgs, pgno, 0); } } return (0); } /* * Write vnode op from an mbuf list. */ int nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable, struct nfsrv_descript *nd, struct thread *p) { struct iovec *iv; int cnt, ioflags, error; struct uio io, *uiop = &io; struct nfsheur *nh; /* * Attempt to write to a DS file. A return of ENOENT implies * there is no DS file to write. */ error = nfsrv_proxyds(vp, off, retlen, nd->nd_cred, p, NFSPROC_WRITEDS, &nd->nd_md, nd, NULL, NULL, NULL, NULL, 0, NULL); if (error != ENOENT) { *stable = NFSWRITE_FILESYNC; return (error); } if (*stable == NFSWRITE_UNSTABLE) ioflags = IO_NODELOCKED; else ioflags = (IO_SYNC | IO_NODELOCKED); if ((nd->nd_md->m_flags & M_NOMAP) != 0) error = nfsrv_createiovecw_extpgs(retlen, nd->nd_md, nd->nd_dpos, nd->nd_dextpg, nd->nd_dextpgsiz, &iv, &cnt); else error = nfsrv_createiovecw(retlen, nd->nd_md, nd->nd_dpos, &iv, &cnt); if (error != 0) return (error); uiop->uio_iov = iv; uiop->uio_iovcnt = cnt; uiop->uio_resid = retlen; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; NFSUIOPROC(uiop, p); uiop->uio_offset = off; nh = nfsrv_sequential_heuristic(uiop, vp); ioflags |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; error = VOP_WRITE(vp, uiop, ioflags, nd->nd_cred); if (error == 0) nh->nh_nextoff = uiop->uio_offset; free(iv, M_TEMP); NFSEXITCODE(error); return (error); } /* * Common code for creating a regular file (plus special files for V2). */ int nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp) { u_quad_t tempsize; int error; struct thread *p = curthread; error = nd->nd_repstat; if (!error && ndp->ni_vp == NULL) { if (nvap->na_type == VREG || nvap->na_type == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); /* For a pNFS server, create the data file on a DS. */ if (error == 0 && nvap->na_type == VREG) { /* * Create a data file on a DS for a pNFS server. * This function just returns if not * running a pNFS DS or the creation fails. */ nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, nd->nd_cred, p); } vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); if (!error) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; error = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, nd->nd_cred); if (error != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; error = NFSERR_NOTSUPP; } } } /* * NFS V2 Only. nfsrvd_mknod() does this for V3. * (This implies, just get out on an error.) */ } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || nvap->na_type == VFIFO) { if (nvap->na_type == VCHR && rdev == 0xffffffff) nvap->na_type = VFIFO; if (nvap->na_type != VFIFO && (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } nvap->na_rdev = rdev; error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); if (error) goto out; } else { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = ENXIO; goto out; } *vpp = ndp->ni_vp; } else { /* * Handle cases where error is already set and/or * the file exists. * 1 - clean up the lookup * 2 - iff !error and na_size set, truncate it */ vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); *vpp = ndp->ni_vp; if (ndp->ni_dvp == *vpp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (!error && nvap->na_size != VNOVAL) { error = nfsvno_accchk(*vpp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (!error) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; error = VOP_SETATTR(*vpp, &nvap->na_vattr, nd->nd_cred); } } if (error) vput(*vpp); } out: NFSEXITCODE(error); return (error); } /* * Do a mknod vnode op. */ int nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p) { int error = 0; enum vtype vtyp; vtyp = nvap->na_type; /* * Iff doesn't exist, create it. */ if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = NFSERR_BADTYPE; goto out; } if (vtyp == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); } else { if (nvap->na_type != VFIFO && (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); /* * Since VOP_MKNOD returns the ni_vp, I can't * see any reason to do the lookup. */ } out: NFSEXITCODE(error); return (error); } /* * Mkdir vnode op. */ int nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp != NULL) { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); nfsvno_relpathbuf(ndp); error = EEXIST; goto out; } error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); out: NFSEXITCODE(error); return (error); } /* * symlink vnode op. */ int nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr, pathcp); vput(ndp->ni_dvp); vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); /* * Although FreeBSD still had the lookup code in * it for 7/current, there doesn't seem to be any * point, since VOP_SYMLINK() returns the ni_vp. * Just vput it for v2. */ if (!not_v2 && !error) vput(ndp->ni_vp); out: NFSEXITCODE(error); return (error); } /* * Parse symbolic link arguments. * This function has an ugly side effect. It will malloc() an area for * the symlink and set iov_base to point to it, only if it succeeds. * So, if it returns with uiop->uio_iov->iov_base != NULL, that must * be FREE'd later. */ int nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, struct thread *p, char **pathcpp, int *lenp) { u_int32_t *tl; char *pathcp = NULL; int error = 0, len; struct nfsv2_sattr *sp; *pathcpp = NULL; *lenp = 0; if ((nd->nd_flag & ND_NFSV3) && (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len > NFS_MAXPATHLEN || len <= 0) { error = EBADRPC; goto nfsmout; } pathcp = malloc(len + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, pathcp, len); if (error) goto nfsmout; if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); } *pathcpp = pathcp; *lenp = len; NFSEXITCODE2(0, nd); return (0); nfsmout: if (pathcp) free(pathcp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Remove a non-directory object. */ int nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS]; int error = 0, mirrorcnt; char fname[PNFS_FILENAME_LEN + 1]; fhandle_t fh; vp = ndp->ni_vp; dsdvp[0] = NULL; if (vp->v_type == VDIR) error = NFSERR_ISDIR; else if (is_v4) error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0), p); if (error == 0) nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); if (!error) error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); if (error == 0 && dsdvp[0] != NULL) nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Remove a directory. */ int nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp; int error = 0; vp = ndp->ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (ndp->ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_vflag & VV_ROOT) error = EBUSY; out: if (!error) error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Rename vnode op. */ int nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) { struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS]; int error = 0, mirrorcnt; char fname[PNFS_FILENAME_LEN + 1]; fhandle_t fh; dsdvp[0] = NULL; fvp = fromndp->ni_vp; if (ndstat) { vrele(fromndp->ni_dvp); vrele(fvp); error = ndstat; goto out1; } tdvp = tondp->ni_dvp; tvp = tondp->ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; goto out; } if (tvp->v_type == VDIR && tvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } /* * A rename to '.' or '..' results in a prematurely * unlocked vnode on FreeBSD5, so I'm just going to fail that * here. */ if ((tondp->ni_cnd.cn_namelen == 1 && tondp->ni_cnd.cn_nameptr[0] == '.') || (tondp->ni_cnd.cn_namelen == 2 && tondp->ni_cnd.cn_nameptr[0] == '.' && tondp->ni_cnd.cn_nameptr[1] == '.')) { error = EINVAL; goto out; } } if (fvp->v_type == VDIR && fvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp->v_mount != tdvp->v_mount) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp == tdvp) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; goto out; } if (fvp == tvp) { /* * If source and destination are the same, there is nothing to * do. Set error to -1 to indicate this. */ error = -1; goto out; } if (ndflag & ND_NFSV4) { if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(fvp, 0, NULL, (nfsquad_t)((u_quad_t)0), p); NFSVOPUNLOCK(fvp); } else error = EPERM; if (tvp && !error) error = nfsrv_checkremove(tvp, 1, NULL, (nfsquad_t)((u_quad_t)0), p); } else { /* * For NFSv2 and NFSv3, try to get rid of the delegation, so * that the NFSv4 client won't be confused by the rename. * Since nfsd_recalldelegation() can only be called on an * unlocked vnode at this point and fvp is the file that will * still exist after the rename, just do fvp. */ nfsd_recalldelegation(fvp, p); } if (error == 0 && tvp != NULL) { nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh); NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" " dsdvp=%p\n", dsdvp[0]); } out: if (!error) { error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, &tondp->ni_cnd); } else { if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fromndp->ni_dvp); vrele(fvp); if (error == -1) error = 0; } /* * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and * if the rename succeeded, the DS file for the tvp needs to be * removed. */ if (error == 0 && dsdvp[0] != NULL) { nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); } vrele(tondp->ni_startdir); nfsvno_relpathbuf(tondp); out1: vrele(fromndp->ni_startdir); nfsvno_relpathbuf(fromndp); NFSEXITCODE(error); return (error); } /* * Link vnode op. */ int nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *xp; int error = 0; xp = ndp->ni_vp; if (xp != NULL) { error = EEXIST; } else { xp = ndp->ni_dvp; if (vp->v_mount != xp->v_mount) error = EXDEV; } if (!error) { NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (!VN_IS_DOOMED(vp)) error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); else error = EPERM; if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); NFSVOPUNLOCK(vp); } else { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vrele(ndp->ni_vp); } nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Do the fsync() appropriate for the commit. */ int nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, struct thread *td) { int error = 0; /* * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of * file is done. At this time VOP_FSYNC does not accept offset and * byte count parameters so call VOP_FSYNC the whole file for now. * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. * File systems that do not use the buffer cache (as indicated * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). */ if (cnt == 0 || cnt > MAX_COMMIT_COUNT || (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { /* * Give up and do the whole thing */ if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); } else { /* * Locate and synchronously write any buffers that fall * into the requested range. Note: we are assuming that * f_iosize is a power of 2. */ int iosize = vp->v_mount->mnt_stat.f_iosize; int iomask = iosize - 1; struct bufobj *bo; daddr_t lblkno; /* * Align to iosize boundary, super-align to page boundary. */ if (off & iomask) { cnt += off & iomask; off &= ~(u_quad_t)iomask; } if (off & PAGE_MASK) { cnt += off & PAGE_MASK; off &= ~(u_quad_t)PAGE_MASK; } lblkno = off / iosize; if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, off, off + cnt, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } bo = &vp->v_bufobj; BO_LOCK(bo); while (cnt > 0) { struct buf *bp; /* * If we have a buffer and it is marked B_DELWRI we * have to lock and write it. Otherwise the prior * write is assumed to have already been committed. * * gbincore() can return invalid buffers now so we * have to check that bit as well (though B_DELWRI * should not be set if B_INVAL is set there could be * a race here since we haven't locked the buffer). */ if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { BO_LOCK(bo); continue; /* retry */ } if ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI) { bremfree(bp); bp->b_flags &= ~B_ASYNC; bwrite(bp); ++nfs_commit_miss; } else BUF_UNLOCK(bp); BO_LOCK(bo); } ++nfs_commit_blks; if (cnt < iosize) break; cnt -= iosize; ++lblkno; } BO_UNLOCK(bo); } NFSEXITCODE(error); return (error); } /* * Statfs vnode op. */ int nfsvno_statfs(struct vnode *vp, struct statfs *sf) { struct statfs *tsf; int error; tsf = NULL; if (nfsrv_devidcnt > 0) { /* For a pNFS service, get the DS numbers. */ tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); error = nfsrv_pnfsstatfs(tsf, vp->v_mount); if (error != 0) { free(tsf, M_TEMP); tsf = NULL; } } error = VFS_STATFS(vp->v_mount, sf); if (error == 0) { if (tsf != NULL) { sf->f_blocks = tsf->f_blocks; sf->f_bavail = tsf->f_bavail; sf->f_bfree = tsf->f_bfree; sf->f_bsize = tsf->f_bsize; } /* * Since NFS handles these values as unsigned on the * wire, there is no way to represent negative values, * so set them to 0. Without this, they will appear * to be very large positive values for clients like * Solaris10. */ if (sf->f_bavail < 0) sf->f_bavail = 0; if (sf->f_ffree < 0) sf->f_ffree = 0; } free(tsf, M_TEMP); NFSEXITCODE(error); return (error); } /* * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but * must handle nfsrv_opencheck() calls after any other access checks. */ void nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct nfsexstuff *exp, struct vnode **vpp) { struct vnode *vp = NULL; u_quad_t tempsize; struct nfsexstuff nes; struct thread *p = curthread; if (ndp->ni_vp == NULL) nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, NULL, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { if (ndp->ni_vp == NULL) { vrele(ndp->ni_startdir); nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); /* For a pNFS server, create the data file on a DS. */ if (nd->nd_repstat == 0) { /* * Create a data file on a DS for a pNFS server. * This function just returns if not * running a pNFS DS or the creation fails. */ nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, cred, p); } vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); if (!nd->nd_repstat) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, cred); if (nd->nd_repstat != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; nd->nd_repstat = NFSERR_NOTSUPP; } else NFSSETBIT_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS); } else { nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp); } } vp = ndp->ni_vp; } else { if (ndp->ni_startdir) vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vp = ndp->ni_vp; if (create == NFSV4OPEN_CREATE) { if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); } if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { if (ndp->ni_cnd.cn_flags & RDONLY) NFSVNO_SETEXRDONLY(&nes); else NFSVNO_EXINIT(&nes); nd->nd_repstat = nfsvno_accchk(vp, VWRITE, cred, &nes, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; nd->nd_repstat = VOP_SETATTR(vp, &nvap->na_vattr, cred); } } else if (vp->v_type == VREG) { nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); } } } else { if (ndp->ni_cnd.cn_flags & HASBUF) nfsvno_relpathbuf(ndp); if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { vrele(ndp->ni_startdir); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vput(ndp->ni_vp); } } *vpp = vp; NFSEXITCODE2(0, nd); } /* * Updates the file rev and sets the mtime and ctime * to the current clock time, returning the va_filerev and va_Xtime * values. * Return ESTALE to indicate the vnode is VIRF_DOOMED. */ int nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, struct nfsrv_descript *nd, struct thread *p) { struct vattr va; VATTR_NULL(&va); vfs_timestamp(&va.va_mtime); if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); if (VN_IS_DOOMED(vp)) return (ESTALE); } (void) VOP_SETATTR(vp, &va, nd->nd_cred); (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); return (0); } /* * Glue routine to nfsv4_fillattr(). */ int nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) { struct statfs *sf; int error; sf = NULL; if (nfsrv_devidcnt > 0 && (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); error = nfsrv_pnfsstatfs(sf, mp); if (error != 0) { free(sf, M_TEMP); sf = NULL; } } error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, mounted_on_fileno, sf); free(sf, M_TEMP); NFSEXITCODE2(0, nd); return (error); } /* Since the Readdir vnode ops vary, put the entire functions in here. */ /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR * - calls VOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The NFSM_CLGET macro is used here so that the reply will be packed * tightly in mbuf clusters. * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. * - it trims out records with d_type == DT_WHT * these cannot be seen through NFS (unless we extend the protocol) * The alternate call nfsrvd_readdirplus() does lookups as well. * PS: The NFS protocol spec. does not clarify what the "count" byte * argument is a count of.. just name strings and file id's or the * entire reply rpc or ... * I tried just file name and id sizes and it confused the Sun client, * so I am using the full rpc size now. The "paranoia.." comment refers * to including the status longwords that are not a part of the dir. * "entry" structures, but are in the rpc. */ int nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct nfsvattr at; int nlen, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, ncookies; u_int64_t off, toff, verf __unused; u_long *cookies = NULL, *cookiep; struct uio io; struct iovec iv; int is_ufs; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); off = fxdr_unsigned(u_quad_t, *tl++); } else { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; verf = fxdr_hyper(tl); tl += 2; } toff = off; cnt = fxdr_unsigned(int, *tl); if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); fullsiz = siz; if (nd->nd_flag & ND_NFSV3) { nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); #if 0 /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ if (!nd->nd_repstat && toff && verf != at.na_filerev) nd->nd_repstat = NFSERR_BAD_COOKIE; #endif } if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (nd->nd_repstat == 0 && cnt == 0) { if (nd->nd_flag & ND_NFSV2) /* NFSv2 does not have NFSERR_TOOSMALL */ nd->nd_repstat = EPERM; else nd->nd_repstat = NFSERR_TOOSMALL; } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; rbuf = malloc(siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (nd->nd_flag & ND_NFSV3) { getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; } /* * Handles the failed cases. nd->nd_repstat == 0 past here. */ if (nd->nd_repstat) { vput(vp); free(rbuf, M_TEMP); if (cookies) free(cookies, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); } else { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; } *tl++ = newnfs_false; *tl = newnfs_true; free(rbuf, M_TEMP); free(cookies, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } vput(vp); /* * dirlen is the size of the reply, including all XDR and must * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate * if the XDR should be included in "count", but to be safe, we do. * (Include the two booleans at the end of the reply in dirlen now.) */ if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; } else { dirlen = 2 * NFSX_UNSIGNED; } /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN) { if (nd->nd_flag & ND_NFSV3) dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); else dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); if (dirlen > cnt) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; } else { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; } *tl = txdr_unsigned(dp->d_fileno); (void) nfsm_strtom(nd, dp->d_name, nlen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; } else NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(*cookiep); } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos < cend) eofflag = 0; NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; free(rbuf, M_TEMP); free(cookies, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Readdirplus for V3 and Readdir for V4. */ int nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct vnode *nvp; fhandle_t nfh; struct nfsvattr nva, at, *nvap = &nva; struct mbuf *mb0, *mb1; struct nfsreferral *refp; int nlen, r, error = 0, getret = 1, usevget = 1; int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; caddr_t bpos0, bpos1; u_int64_t off, toff, verf; u_long *cookies = NULL, *cookiep; nfsattrbit_t attrbits, rderrbits, savbits; struct uio io; struct iovec iv; struct componentname cn; int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; struct mount *mp, *new_mp; uint64_t mounted_on_fileno; struct thread *p = curthread; int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); off = fxdr_hyper(tl); toff = off; tl += 2; verf = fxdr_hyper(tl); tl += 2; siz = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); /* * Use the server's maximum data transfer size as the upper bound * on reply datalen. */ if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); /* * siz is a "hint" of how much directory information (name, fileid, * cookie) should be in the reply. At least one client "hints" 0, * so I set it to cnt for that case. I also round it up to the * next multiple of DIRBLKSIZ. * Since the size of a Readdirplus directory entry reply will always * be greater than a directory entry returned by VOP_READDIR(), it * does not make sense to read more than NFS_SRVMAXDATA() via * VOP_READDIR(). */ if (siz <= 0) siz = cnt; else if (siz > NFS_SRVMAXDATA(nd)) siz = NFS_SRVMAXDATA(nd); siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); if (nd->nd_flag & ND_NFSV4) { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; NFSSET_ATTRBIT(&savbits, &attrbits); NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd); NFSZERO_ATTRBIT(&rderrbits); NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); } else { NFSZERO_ATTRBIT(&attrbits); } fullsiz = siz; nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); #if 0 if (!nd->nd_repstat) { if (off && verf != at.na_filerev) { /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ if (nd->nd_flag & ND_NFSV4) { nd->nd_repstat = NFSERR_NOTSAME; } else { nd->nd_repstat = NFSERR_BAD_COOKIE; } } } #endif if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (!nd->nd_repstat && cnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; rbuf = malloc(siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (!nd->nd_repstat) nd->nd_repstat = getret; if (nd->nd_repstat) { vput(vp); if (cookies) free(cookies, M_TEMP); free(rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; *tl++ = newnfs_false; *tl = newnfs_true; free(cookies, M_TEMP); free(rbuf, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || ((nd->nd_flag & ND_NFSV4) && ((dp->d_namlen == 1 && dp->d_name[0] == '.') || (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } /* * Busy the file system so that the mount point won't go away * and, as such, VFS_VGET() can be used safely. */ mp = vp->v_mount; vfs_ref(mp); NFSVOPUNLOCK(vp); nd->nd_repstat = vfs_busy(mp, 0); vfs_rel(mp); if (nd->nd_repstat != 0) { vrele(vp); free(cookies, M_TEMP); free(rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * Check to see if entries in this directory can be safely acquired * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. * ZFS snapshot directories need VOP_LOOKUP(), so that any * automount of the snapshot directory that is required will * be done. * This needs to be done here for NFSv4, since NFSv4 never does * a VFS_VGET() for "." or "..". */ if (is_zfs == 1) { r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); if (r == EOPNOTSUPP) { usevget = 0; cn.cn_nameiop = LOOKUP; cn.cn_lkflags = LK_SHARED | LK_RETRY; cn.cn_cred = nd->nd_cred; cn.cn_thread = p; } else if (r == 0) vput(nvp); } /* * Save this position, in case there is an error before one entry * is created. */ mb0 = nd->nd_mb; bpos0 = nd->nd_bpos; bextpg0 = nd->nd_bextpg; bextpgsiz0 = nd->nd_bextpgsiz; /* * Fill in the first part of the reply. * dirlen is the reply length in bytes and cannot exceed cnt. * (Include the two booleans at the end of the reply in dirlen now, * so we recognize when we have exceeded cnt.) */ if (nd->nd_flag & ND_NFSV3) { dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; nfsrv_postopattr(nd, getret, &at); } else { dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; } NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); txdr_hyper(at.na_filerev, tl); /* * Save this position, in case there is an empty reply needed. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; bextpg1 = nd->nd_bextpg; bextpgsiz1 = nd->nd_bextpgsiz; /* Loop through the records and build reply */ entrycnt = 0; while (cpos < cend && ncookies > 0 && dirlen < cnt) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN && ((nd->nd_flag & ND_NFSV3) || nlen > 2 || (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) || (nlen == 1 && dp->d_name[0] != '.'))) { /* * Save the current position in the reply, in case * this entry exceeds cnt. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; bextpg1 = nd->nd_bextpg; bextpgsiz1 = nd->nd_bextpgsiz; /* * For readdir_and_lookup get the vnode using * the file number. */ nvp = NULL; refp = NULL; r = 0; at_root = 0; needs_unbusy = 0; new_mp = mp; mounted_on_fileno = (uint64_t)dp->d_fileno; if ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&savbits)) { if (nd->nd_flag & ND_NFSV4) refp = nfsv4root_getreferral(NULL, vp, dp->d_fileno); if (refp == NULL) { if (usevget) r = VFS_VGET(mp, dp->d_fileno, LK_SHARED, &nvp); else r = EOPNOTSUPP; if (r == EOPNOTSUPP) { if (usevget) { usevget = 0; cn.cn_nameiop = LOOKUP; cn.cn_lkflags = LK_SHARED | LK_RETRY; cn.cn_cred = nd->nd_cred; cn.cn_thread = p; } cn.cn_nameptr = dp->d_name; cn.cn_namelen = nlen; cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF; if (nlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.') cn.cn_flags |= ISDOTDOT; if (NFSVOPLOCK(vp, LK_SHARED) != 0) { nd->nd_repstat = EPERM; break; } if ((vp->v_vflag & VV_ROOT) != 0 && (cn.cn_flags & ISDOTDOT) != 0) { vref(vp); nvp = vp; r = 0; } else { r = VOP_LOOKUP(vp, &nvp, &cn); if (vp != nvp) NFSVOPUNLOCK(vp); } } /* * For NFSv4, check to see if nvp is * a mount point and get the mount * point vnode, as required. */ if (r == 0 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && nvp->v_mountedhere != NULL) { new_mp = nvp->v_mountedhere; r = vfs_busy(new_mp, 0); vput(nvp); nvp = NULL; if (r == 0) { r = VFS_ROOT(new_mp, LK_SHARED, &nvp); needs_unbusy = 1; if (r == 0) at_root = 1; } } } /* * If we failed to look up the entry, then it * has become invalid, most likely removed. */ if (r != 0) { if (needs_unbusy) vfs_unbusy(new_mp); goto invalid; } KASSERT(refp != NULL || nvp != NULL, ("%s: undetected lookup error", __func__)); if (refp == NULL && ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&attrbits))) { r = nfsvno_getfh(nvp, &nfh, p); if (!r) r = nfsvno_getattr(nvp, nvap, nd, p, 1, &attrbits); if (r == 0 && is_zfs == 1 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && vp->v_mount != nvp->v_mount) { /* * For a ZFS snapshot, there is a * pseudo mount that does not set * v_mountedhere, so it needs to * be detected via a different * mount structure. */ at_root = 1; if (new_mp == mp) new_mp = nvp->v_mount; } } /* * If we failed to get attributes of the entry, * then just skip it for NFSv3 (the traditional * behavior in the old NFS server). * For NFSv4 the behavior is controlled by * RDATTRERROR: we either ignore the error or * fail the request. * Note that RDATTRERROR is never set for NFSv3. */ if (r != 0) { if (!NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR)) { vput(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); if ((nd->nd_flag & ND_NFSV3)) goto invalid; nd->nd_repstat = r; break; } } } /* * Build the directory record xdr */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(dp->d_fileno); dirlen += nfsm_strtom(nd, dp->d_name, nlen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = txdr_unsigned(*cookiep); nfsrv_postopattr(nd, 0, nvap); dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); if (nvp != NULL) vput(nvp); } else { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(*cookiep); dirlen += nfsm_strtom(nd, dp->d_name, nlen); if (nvp != NULL) { supports_nfsv4acls = nfs_supportsnfsv4acls(nvp); NFSVOPUNLOCK(nvp); } else supports_nfsv4acls = 0; if (refp != NULL) { dirlen += nfsrv_putreferralattr(nd, &savbits, refp, 0, &nd->nd_repstat); if (nd->nd_repstat) { if (nvp != NULL) vrele(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); break; } } else if (r) { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &rderrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } else { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &attrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } if (nvp != NULL) vrele(nvp); dirlen += (3 * NFSX_UNSIGNED); } if (needs_unbusy != 0) vfs_unbusy(new_mp); if (dirlen <= cnt) entrycnt++; } invalid: cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } vrele(vp); vfs_unbusy(mp); /* * If dirlen > cnt, we must strip off the last entry. If that * results in an empty reply, report NFSERR_TOOSMALL. */ if (dirlen > cnt || nd->nd_repstat) { if (!nd->nd_repstat && entrycnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (nd->nd_repstat) { nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); } else nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1); eofflag = 0; } else if (cpos < cend) eofflag = 0; if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; } free(cookies, M_TEMP); free(rbuf, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Get the settable attributes out of the mbuf list. * (Return 0 or EBADRPC) */ int nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; struct nfsv2_sattr *sp; int error = 0, toclient = 0; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); /* * Some old clients didn't fill in the high order 16bits. * --> check the low order 2 bytes for 0xffff */ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) nvap->na_mode = nfstov_mode(sp->sa_mode); if (sp->sa_uid != newnfs_xdrneg1) nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); if (sp->sa_gid != newnfs_xdrneg1) nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); if (sp->sa_size != newnfs_xdrneg1) nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { #ifdef notyet fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); #else nvap->na_atime.tv_sec = fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); nvap->na_atime.tv_nsec = 0; #endif } if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); break; case ND_NFSV3: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_uid = fxdr_unsigned(uid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_gid = fxdr_unsigned(gid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); nvap->na_size = fxdr_hyper(tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_atime); toclient = 1; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; break; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; break; } break; case ND_NFSV4: error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Handle the setable attributes for V4. * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. */ int nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; int attrsum = 0; int i, j; int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; int moderet, toclient = 0; u_char *cp, namestr[NFSV4_SMALLSTR + 1]; uid_t uid; gid_t gid; u_short mode, mask; /* Same type as va_mode. */ struct vattr va; error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsize = fxdr_unsigned(int, *tl); /* * Loop around getting the setable attributes. If an unsupported * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. */ if (retnotsup) { nd->nd_repstat = NFSERR_ATTRNOTSUPP; bitpos = NFSATTRBIT_MAX; } else { bitpos = 0; } moderet = 0; for (; bitpos < NFSATTRBIT_MAX; bitpos++) { if (attrsum > attrsize) { error = NFSERR_BADXDR; goto nfsmout; } if (NFSISSET_ATTRBIT(attrbitp, bitpos)) switch (bitpos) { case NFSATTRBIT_SIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (vp != NULL && vp->v_type != VREG) { error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL; goto nfsmout; } nvap->na_size = fxdr_hyper(tl); attrsum += NFSX_HYPER; break; case NFSATTRBIT_ACL: error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, p); if (error) goto nfsmout; if (aceerr && !nd->nd_repstat) nd->nd_repstat = aceerr; attrsum += aclsize; break; case NFSATTRBIT_ARCHIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HIDDEN: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MIMETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); break; case NFSATTRBIT_MODE: moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid); if (!nd->nd_repstat) nvap->na_uid = uid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_OWNERGROUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid); if (!nd->nd_repstat) nvap->na_gid = gid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_SYSTEM: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESSSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_atime); toclient = 1; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; } break; case NFSATTRBIT_TIMEBACKUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; } break; case NFSATTRBIT_MODESETMASKED: NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); mode = fxdr_unsigned(u_short, *tl++); mask = fxdr_unsigned(u_short, *tl); /* * vp == NULL implies an Open/Create operation. * This attribute can only be used for Setattr and * only for NFSv4.1 or higher. * If moderet != 0, a mode attribute has also been * specified and this attribute cannot be done in the * same Setattr operation. */ if ((nd->nd_flag & ND_NFSV41) == 0) nd->nd_repstat = NFSERR_ATTRNOTSUPP; else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 || vp == NULL) nd->nd_repstat = NFSERR_INVAL; else if (moderet == 0) moderet = VOP_GETATTR(vp, &va, nd->nd_cred); if (moderet == 0) nvap->na_mode = (mode & mask) | (va.va_mode & ~mask); else nd->nd_repstat = moderet; attrsum += 2 * NFSX_UNSIGNED; break; default: nd->nd_repstat = NFSERR_ATTRNOTSUPP; /* * set bitpos so we drop out of the loop. */ bitpos = NFSATTRBIT_MAX; break; } } /* * some clients pad the attrlist, so we need to skip over the * padding. */ if (attrsum > attrsize) { error = NFSERR_BADXDR; } else { attrsize = NFSM_RNDUP(attrsize); if (attrsum < attrsize) error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Check/setup export credentials. */ int nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, struct ucred *credanon) { int error = 0; /* * Check/setup credentials. */ if (nd->nd_flag & ND_GSS) exp->nes_exflag &= ~MNT_EXPORTANON; /* * Check to see if the operation is allowed for this security flavor. * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. * Also, allow Secinfo, so that it can acquire the correct flavor(s). */ if (nfsvno_testexp(nd, exp) && nd->nd_procnum != NFSV4OP_SECINFO && nd->nd_procnum != NFSPROC_FSINFO) { if (nd->nd_flag & ND_NFSV4) error = NFSERR_WRONGSEC; else error = (NFSERR_AUTHERR | AUTH_TOOWEAK); goto out; } /* * Check to see if the file system is exported V4 only. */ if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { error = NFSERR_PROGNOTV4; goto out; } /* * Now, map the user credentials. * (Note that ND_AUTHNONE will only be set for an NFSv3 * Fsinfo RPC. If set for anything else, this code might need * to change.) */ if (NFSVNO_EXPORTED(exp)) { if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || NFSVNO_EXPORTANON(exp) || (nd->nd_flag & ND_AUTHNONE) != 0) { nd->nd_cred->cr_uid = credanon->cr_uid; nd->nd_cred->cr_gid = credanon->cr_gid; crsetgroups(nd->nd_cred, credanon->cr_ngroups, credanon->cr_groups); } else if ((nd->nd_flag & ND_GSS) == 0) { /* * If using AUTH_SYS, call nfsrv_getgrpscred() to see * if there is a replacement credential with a group * list set up by "nfsuserd -manage-gids". * If there is no replacement, nfsrv_getgrpscred() * simply returns its argument. */ nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); } } out: NFSEXITCODE2(error, nd); return (error); } /* * Check exports. */ int nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, struct ucred **credp) { int i, error, *secflavors; error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, &secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > MAXSECFLAVORS) { printf("nfsvno_checkexp: numsecflavors out of range\n"); exp->nes_numsecflavor = 0; error = EACCES; } else { /* Copy the security flavors. */ for (i = 0; i < exp->nes_numsecflavor; i++) exp->nes_secflavors[i] = secflavors[i]; } NFSEXITCODE(error); return (error); } /* * Get a vnode for a file handle and export stuff. */ int nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct ucred **credp) { int i, error, *secflavors; *credp = NULL; exp->nes_numsecflavor = 0; error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); if (error != 0) /* Make sure the server replies ESTALE to the client. */ error = ESTALE; if (nam && !error) { error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, &secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } else { vput(*vpp); } } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > MAXSECFLAVORS) { printf("nfsvno_fhtovp: numsecflavors out of range\n"); exp->nes_numsecflavor = 0; error = EACCES; vput(*vpp); } else { /* Copy the security flavors. */ for (i = 0; i < exp->nes_numsecflavor; i++) exp->nes_secflavors[i] = secflavors[i]; } } NFSEXITCODE(error); return (error); } /* * nfsd_fhtovp() - convert a fh to a vnode ptr * - look up fsid in mount list (if not found ret error) * - get vp and export rights by calling nfsvno_fhtovp() * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon * for AUTH_SYS * - if mpp != NULL, return the mount point so that it can * be used for vn_finished_write() by the caller */ void nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct mount **mpp, int startwrite) { struct mount *mp; struct ucred *credanon; fhandle_t *fhp; fhp = (fhandle_t *)nfp->nfsrvfh_data; /* * Check for the special case of the nfsv4root_fh. */ mp = vfs_busyfs(&fhp->fh_fsid); if (mpp != NULL) *mpp = mp; if (mp == NULL) { *vpp = NULL; nd->nd_repstat = ESTALE; goto out; } if (startwrite) { vn_start_write(NULL, mpp, V_WAIT); if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) lktype = LK_EXCLUSIVE; } nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, &credanon); vfs_unbusy(mp); /* * For NFSv4 without a pseudo root fs, unexported file handles * can be returned, so that Lookup works everywhere. */ if (!nd->nd_repstat && exp->nes_exflag == 0 && !(nd->nd_flag & ND_NFSV4)) { vput(*vpp); nd->nd_repstat = NFSERR_ACCES; } /* * If TLS is required by the export, check the flags in nd_flag. */ if (nd->nd_repstat == 0 && ((NFSVNO_EXTLS(exp) && (nd->nd_flag & ND_TLS) == 0) || (NFSVNO_EXTLSCERT(exp) && (nd->nd_flag & ND_TLSCERT) == 0) || (NFSVNO_EXTLSCERTUSER(exp) && (nd->nd_flag & ND_TLSCERTUSER) == 0))) { vput(*vpp); nd->nd_repstat = NFSERR_ACCES; } /* * Personally, I've never seen any point in requiring a * reserved port#, since only in the rare case where the * clients are all boxes with secure system privileges, * does it provide any enhanced security, but... some people * believe it to be useful and keep putting this code back in. * (There is also some "security checker" out there that * complains if the nfs server doesn't enforce this.) * However, note the following: * RFC3530 (NFSv4) specifies that a reserved port# not be * required. * RFC2623 recommends that, if a reserved port# is checked for, * that there be a way to turn that off--> ifdef'd. */ #ifdef NFS_REQRSVPORT if (!nd->nd_repstat) { struct sockaddr_in *saddr; struct sockaddr_in6 *saddr6; saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); if (!(nd->nd_flag & ND_NFSV4) && ((saddr->sin_family == AF_INET && ntohs(saddr->sin_port) >= IPPORT_RESERVED) || (saddr6->sin6_family == AF_INET6 && ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { vput(*vpp); nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); } } #endif /* NFS_REQRSVPORT */ /* * Check/setup credentials. */ if (!nd->nd_repstat) { nd->nd_saveduid = nd->nd_cred->cr_uid; nd->nd_repstat = nfsd_excred(nd, exp, credanon); if (nd->nd_repstat) vput(*vpp); } if (credanon != NULL) crfree(credanon); if (nd->nd_repstat) { if (startwrite) vn_finished_write(mp); *vpp = NULL; if (mpp != NULL) *mpp = NULL; } out: NFSEXITCODE2(0, nd); } /* * glue for fp. */ static int fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) { struct filedesc *fdp; struct file *fp; int error = 0; fdp = p->td_proc->p_fd; if (fd < 0 || fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { error = EBADF; goto out; } *fpp = fp; out: NFSEXITCODE(error); return (error); } /* * Called from nfssvc() to update the exports list. Just call * vfs_export(). This has to be done, since the v4 root fake fs isn't * in the mount list. */ int nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) { struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; int error = 0; struct nameidata nd; fhandle_t fh; error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) nfs_rootfhset = 0; else if (error == 0) { if (nfsexargp->fspec == NULL) { error = EPERM; goto out; } /* * If fspec != NULL, this is the v4root path. */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec, p); if ((error = namei(&nd)) != 0) goto out; error = nfsvno_getfh(nd.ni_vp, &fh, p); vrele(nd.ni_vp); if (!error) { nfs_rootfh.nfsrvfh_len = NFSX_MYFH; NFSBCOPY((caddr_t)&fh, nfs_rootfh.nfsrvfh_data, sizeof (fhandle_t)); nfs_rootfhset = 1; } } out: NFSEXITCODE(error); return (error); } /* * This function needs to test to see if the system is near its limit * for memory allocation via malloc() or mget() and return True iff * either of these resources are near their limit. * XXX (For now, this is just a stub.) */ int nfsrv_testmalloclimit = 0; int nfsrv_mallocmget_limit(void) { static int printmesg = 0; static int testval = 1; if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { if ((printmesg++ % 100) == 0) printf("nfsd: malloc/mget near limit\n"); return (1); } return (0); } /* * BSD specific initialization of a mount point. */ void nfsd_mntinit(void) { static int inited = 0; if (inited) return; inited = 1; nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); TAILQ_INIT(&nfsv4root_mnt.mnt_lazyvnodelist); nfsv4root_mnt.mnt_export = NULL; TAILQ_INIT(&nfsv4root_opt); TAILQ_INIT(&nfsv4root_newopt); nfsv4root_mnt.mnt_opt = &nfsv4root_opt; nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; nfsv4root_mnt.mnt_nvnodelistsize = 0; nfsv4root_mnt.mnt_lazyvnodelistsize = 0; } /* * Get a vnode for a file handle, without checking exports, etc. */ struct vnode * nfsvno_getvp(fhandle_t *fhp) { struct mount *mp; struct vnode *vp; int error; mp = vfs_busyfs(&fhp->fh_fsid); if (mp == NULL) return (NULL); error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error) return (NULL); return (vp); } /* * Do a local VOP_ADVLOCK(). */ int nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, u_int64_t end, struct thread *td) { int error = 0; struct flock fl; u_int64_t tlen; if (nfsrv_dolocallocks == 0) goto out; ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); fl.l_whence = SEEK_SET; fl.l_type = ftype; fl.l_start = (off_t)first; if (end == NFS64BITSSET) { fl.l_len = 0; } else { tlen = end - first; fl.l_len = (off_t)tlen; } /* * For FreeBSD8, the l_pid and l_sysid must be set to the same * values for all calls, so that all locks will be held by the * nfsd server. (The nfsd server handles conflicts between the * various clients.) * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 * bytes, so it can't be put in l_sysid. */ if (nfsv4_sysid == 0) nfsv4_sysid = nlm_acquire_next_sysid(); fl.l_pid = (pid_t)0; fl.l_sysid = (int)nfsv4_sysid; if (ftype == F_UNLCK) error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, (F_POSIX | F_REMOTE)); else error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, (F_POSIX | F_REMOTE)); out: NFSEXITCODE(error); return (error); } /* * Check the nfsv4 root exports. */ int nfsvno_v4rootexport(struct nfsrv_descript *nd) { struct ucred *credanon; int exflags, error = 0, numsecflavor, *secflavors, i; error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, &credanon, &numsecflavor, &secflavors); if (error) { error = NFSERR_PROGUNAVAIL; goto out; } if (credanon != NULL) crfree(credanon); for (i = 0; i < numsecflavor; i++) { if (secflavors[i] == AUTH_SYS) nd->nd_flag |= ND_EXAUTHSYS; else if (secflavors[i] == RPCSEC_GSS_KRB5) nd->nd_flag |= ND_EXGSS; else if (secflavors[i] == RPCSEC_GSS_KRB5I) nd->nd_flag |= ND_EXGSSINTEGRITY; else if (secflavors[i] == RPCSEC_GSS_KRB5P) nd->nd_flag |= ND_EXGSSPRIVACY; } /* And set ND_EXxx flags for TLS. */ if ((exflags & MNTEX_TLS) != 0) { nd->nd_flag |= ND_EXTLS; if ((exflags & MNTEX_TLSCERT) != 0) nd->nd_flag |= ND_EXTLSCERT; if ((exflags & MNTEX_TLSCERTUSER) != 0) nd->nd_flag |= ND_EXTLSCERTUSER; } out: NFSEXITCODE(error); return (error); } /* * Nfs server pseudo system call for the nfsd's */ /* * MPSAFE */ static int nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) { struct file *fp; struct nfsd_addsock_args sockarg; struct nfsd_nfsd_args nfsdarg; struct nfsd_nfsd_oargs onfsdarg; struct nfsd_pnfsd_args pnfsdarg; struct vnode *vp, *nvp, *curdvp; struct pnfsdsfile *pf; struct nfsdevice *ds, *fds; cap_rights_t rights; int buflen, error, ret; char *buf, *cp, *cp2, *cp3; char fname[PNFS_FILENAME_LEN + 1]; if (uap->flag & NFSSVC_NFSDADDSOCK) { error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); if (error) goto out; /* * Since we don't know what rights might be required, * pretend that we need them all. It is better to be too * careful than too reckless. */ error = fget(td, sockarg.sock, cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); if (error != 0) goto out; if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); error = EPERM; goto out; } error = nfsrvd_addsock(fp); fdrop(fp, td); } else if (uap->flag & NFSSVC_NFSDNFSD) { if (uap->argp == NULL) { error = EINVAL; goto out; } if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); if (error == 0) { nfsdarg.principal = onfsdarg.principal; nfsdarg.minthreads = onfsdarg.minthreads; nfsdarg.maxthreads = onfsdarg.maxthreads; nfsdarg.version = 1; nfsdarg.addr = NULL; nfsdarg.addrlen = 0; nfsdarg.dnshost = NULL; nfsdarg.dnshostlen = 0; nfsdarg.dspath = NULL; nfsdarg.dspathlen = 0; nfsdarg.mdspath = NULL; nfsdarg.mdspathlen = 0; nfsdarg.mirrorcnt = 1; } } else error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); if (error) goto out; if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 && nfsdarg.mirrorcnt >= 1 && nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) { NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen, nfsdarg.dspathlen, nfsdarg.dnshostlen, nfsdarg.mdspathlen, nfsdarg.mirrorcnt); cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); if (error != 0) { free(cp, M_TEMP); goto out; } cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ nfsdarg.addr = cp; cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); if (error != 0) { free(nfsdarg.addr, M_TEMP); free(cp, M_TEMP); goto out; } cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ nfsdarg.dnshost = cp; cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); if (error != 0) { free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(cp, M_TEMP); goto out; } cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ nfsdarg.dspath = cp; cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen); if (error != 0) { free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(nfsdarg.dspath, M_TEMP); free(cp, M_TEMP); goto out; } cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */ nfsdarg.mdspath = cp; } else { nfsdarg.addr = NULL; nfsdarg.addrlen = 0; nfsdarg.dnshost = NULL; nfsdarg.dnshostlen = 0; nfsdarg.dspath = NULL; nfsdarg.dspathlen = 0; nfsdarg.mdspath = NULL; nfsdarg.mdspathlen = 0; nfsdarg.mirrorcnt = 1; } error = nfsrvd_nfsd(td, &nfsdarg); free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(nfsdarg.dspath, M_TEMP); free(nfsdarg.mdspath, M_TEMP); } else if (uap->flag & NFSSVC_PNFSDS) { error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER || pnfsdarg.op == PNFSDOP_FORCEDELDS)) { cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, NULL); if (error == 0) error = nfsrv_deldsserver(pnfsdarg.op, cp, td); free(cp, M_TEMP); } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; buf = malloc(buflen, M_TEMP, M_WAITOK); error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, NULL); NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); if (error == 0 && pnfsdarg.dspath != NULL) { cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); error = copyinstr(pnfsdarg.dspath, cp2, PATH_MAX + 1, NULL); NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", error); } else cp2 = NULL; if (error == 0 && pnfsdarg.curdspath != NULL) { cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); error = copyinstr(pnfsdarg.curdspath, cp3, PATH_MAX + 1, NULL); NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", error); } else cp3 = NULL; curdvp = NULL; fds = NULL; if (error == 0) error = nfsrv_mdscopymr(cp, cp2, cp3, buf, &buflen, fname, td, &vp, &nvp, &pf, &ds, &fds); NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); if (error == 0) { if (pf->dsf_dir >= nfsrv_dsdirsize) { printf("copymr: dsdir out of range\n"); pf->dsf_dir = 0; } NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); error = nfsrv_copymr(vp, nvp, ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, (struct pnfsdsfile *)buf, buflen / sizeof(*pf), td->td_ucred, td); vput(vp); vput(nvp); if (fds != NULL && error == 0) { curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; ret = vn_lock(curdvp, LK_EXCLUSIVE); if (ret == 0) { nfsrv_dsremove(curdvp, fname, td->td_ucred, td); NFSVOPUNLOCK(curdvp); } } NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); } free(cp, M_TEMP); free(cp2, M_TEMP); free(cp3, M_TEMP); free(buf, M_TEMP); } } else { error = nfssvc_srvcall(td, uap, td->td_ucred); } out: NFSEXITCODE(error); return (error); } static int nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { struct nfsex_args export; struct file *fp = NULL; int stablefd, len; struct nfsd_clid adminrevoke; struct nfsd_dumplist dumplist; struct nfsd_dumpclients *dumpclients; struct nfsd_dumplocklist dumplocklist; struct nfsd_dumplocks *dumplocks; struct nameidata nd; vnode_t vp; int error = EINVAL, igotlock; struct proc *procp; static int suspend_nfsd = 0; if (uap->flag & NFSSVC_PUBLICFH) { NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); error = copyin(uap->argp, &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); if (!error) nfs_pubfhset = 1; } else if (uap->flag & NFSSVC_V4ROOTEXPORT) { error = copyin(uap->argp,(caddr_t)&export, sizeof (struct nfsex_args)); if (!error) error = nfsrv_v4rootexport(&export, cred, p); } else if (uap->flag & NFSSVC_NOPUBLICFH) { nfs_pubfhset = 0; error = 0; } else if (uap->flag & NFSSVC_STABLERESTART) { error = copyin(uap->argp, (caddr_t)&stablefd, sizeof (int)); if (!error) error = fp_getfvp(p, stablefd, &fp, &vp); if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) error = EBADF; if (!error && newnfs_numnfsd != 0) error = EPERM; if (!error) { nfsrv_stablefirst.nsf_fp = fp; nfsrv_setupstable(p); } } else if (uap->flag & NFSSVC_ADMINREVOKE) { error = copyin(uap->argp, (caddr_t)&adminrevoke, sizeof (struct nfsd_clid)); if (!error) error = nfsrv_adminrevoke(&adminrevoke, p); } else if (uap->flag & NFSSVC_DUMPCLIENTS) { error = copyin(uap->argp, (caddr_t)&dumplist, sizeof (struct nfsd_dumplist)); if (!error && (dumplist.ndl_size < 1 || dumplist.ndl_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) { len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO); nfsrv_dumpclients(dumpclients, dumplist.ndl_size); error = copyout(dumpclients, CAST_USER_ADDR_T(dumplist.ndl_list), len); free(dumpclients, M_TEMP); } } else if (uap->flag & NFSSVC_DUMPLOCKS) { error = copyin(uap->argp, (caddr_t)&dumplocklist, sizeof (struct nfsd_dumplocklist)); if (!error && (dumplocklist.ndllck_size < 1 || dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) error = nfsrv_lookupfilename(&nd, dumplocklist.ndllck_fname, p); if (!error) { len = sizeof (struct nfsd_dumplocks) * dumplocklist.ndllck_size; dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO); nfsrv_dumplocks(nd.ni_vp, dumplocks, dumplocklist.ndllck_size, p); vput(nd.ni_vp); error = copyout(dumplocks, CAST_USER_ADDR_T(dumplocklist.ndllck_list), len); free(dumplocks, M_TEMP); } } else if (uap->flag & NFSSVC_BACKUPSTABLE) { procp = p->td_proc; PROC_LOCK(procp); nfsd_master_pid = procp->p_pid; bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); nfsd_master_start = procp->p_stats->p_start; nfsd_master_proc = procp; PROC_UNLOCK(procp); } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd == 0) { /* Lock out all nfsd threads */ do { igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0 && suspend_nfsd == 0); suspend_nfsd = 1; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd != 0) { nfsv4_unlock(&nfsd_suspend_lock, 0); suspend_nfsd = 0; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } NFSEXITCODE(error); return (error); } /* * Check exports. * Returns 0 if ok, 1 otherwise. */ int nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) { int i; /* * This seems odd, but allow the case where the security flavor * list is empty. This happens when NFSv4 is traversing non-exported * file systems. Exported file systems should always have a non-empty * security flavor list. */ if (exp->nes_numsecflavor == 0) return (0); for (i = 0; i < exp->nes_numsecflavor; i++) { /* * The tests for privacy and integrity must be first, * since ND_GSS is set for everything but AUTH_SYS. */ if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && (nd->nd_flag & ND_GSSPRIVACY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && (nd->nd_flag & ND_GSSINTEGRITY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && (nd->nd_flag & ND_GSS)) return (0); if (exp->nes_secflavors[i] == AUTH_SYS && (nd->nd_flag & ND_GSS) == 0) return (0); } return (1); } /* * Calculate a hash value for the fid in a file handle. */ uint32_t nfsrv_hashfh(fhandle_t *fhp) { uint32_t hashval; hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); return (hashval); } /* * Calculate a hash value for the sessionid. */ uint32_t nfsrv_hashsessionid(uint8_t *sessionid) { uint32_t hashval; hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); return (hashval); } /* * Signal the userland master nfsd to backup the stable restart file. */ void nfsrv_backupstable(void) { struct proc *procp; if (nfsd_master_proc != NULL) { procp = pfind(nfsd_master_pid); /* Try to make sure it is the correct process. */ if (procp == nfsd_master_proc && procp->p_stats->p_start.tv_sec == nfsd_master_start.tv_sec && procp->p_stats->p_start.tv_usec == nfsd_master_start.tv_usec && strcmp(procp->p_comm, nfsd_master_comm) == 0) kern_psignal(procp, SIGUSR2); else nfsd_master_proc = NULL; if (procp != NULL) PROC_UNLOCK(procp); } } /* * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. * The arguments are in a structure, so that they can be passed through * taskqueue for a kernel process to execute this function. */ struct nfsrvdscreate { int done; int inprog; struct task tsk; struct ucred *tcred; struct vnode *dvp; NFSPROC_T *p; struct pnfsdsfile *pf; int err; fhandle_t fh; struct vattr va; struct vattr createva; }; int nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) { struct vnode *nvp; struct nameidata named; struct vattr va; char *bufp; u_long *hashp; struct nfsnode *np; struct nfsmount *nmp; int error; NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; named.ni_cnd.cn_thread = p; named.ni_cnd.cn_nameptr = bufp; if (fnamep != NULL) { strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); named.ni_cnd.cn_namelen = strlen(bufp); } else named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); /* Create the date file in the DS mount. */ error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); if (error == 0) { error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); NFSVOPUNLOCK(dvp); if (error == 0) { /* Set the ownership of the file. */ error = VOP_SETATTR(nvp, nvap, tcred); NFSD_DEBUG(4, "nfsrv_dscreate:" " setattr-uid=%d\n", error); if (error != 0) vput(nvp); } if (error != 0) printf("pNFS: pnfscreate failed=%d\n", error); } else printf("pNFS: pnfscreate vnlock=%d\n", error); if (error == 0) { np = VTONFS(nvp); nmp = VFSTONFS(nvp->v_mount); if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") != 0 || nmp->nm_nam->sa_len > sizeof( struct sockaddr_in6) || np->n_fhp->nfh_len != NFSX_MYFH) { printf("Bad DS file: fstype=%s salen=%d" " fhlen=%d\n", nvp->v_mount->mnt_vfc->vfc_name, nmp->nm_nam->sa_len, np->n_fhp->nfh_len); error = ENOENT; } /* Set extattrs for the DS on the MDS file. */ if (error == 0) { if (dsa != NULL) { error = VOP_GETATTR(nvp, &va, tcred); if (error == 0) { dsa->dsa_filerev = va.va_filerev; dsa->dsa_size = va.va_size; dsa->dsa_atime = va.va_atime; dsa->dsa_mtime = va.va_mtime; dsa->dsa_bytes = va.va_bytes; } } if (error == 0) { NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, nmp->nm_nam->sa_len); NFSBCOPY(named.ni_cnd.cn_nameptr, pf->dsf_filename, sizeof(pf->dsf_filename)); } } else printf("pNFS: pnfscreate can't get DS" " attr=%d\n", error); if (nvpp != NULL && error == 0) *nvpp = nvp; else vput(nvp); } nfsvno_relpathbuf(&named); return (error); } /* * Start up the thread that will execute nfsrv_dscreate(). */ static void start_dscreate(void *arg, int pending) { struct nfsrvdscreate *dsc; dsc = (struct nfsrvdscreate *)arg; dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); dsc->done = 1; NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); } /* * Create a pNFS data file on the Data Server(s). */ static void nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, NFSPROC_T *p) { struct nfsrvdscreate *dsc, *tdsc = NULL; struct nfsdevice *ds, *tds, *fds; struct mount *mp; struct pnfsdsfile *pf, *tpf; struct pnfsdsattr dsattr; struct vattr va; struct vnode *dvp[NFSDEV_MAXMIRRORS]; struct nfsmount *nmp; fhandle_t fh; uid_t vauid; gid_t vagid; u_short vamode; struct ucred *tcred; int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret; int failpos, timo; /* Get a DS server directory in a round-robin order. */ mirrorcnt = 1; mp = vp->v_mount; ds = fds = NULL; NFSDDSLOCK(); /* * Search for the first entry that handles this MDS fs, but use the * first entry for all MDS fs's otherwise. */ TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { if (tds->nfsdev_nmp != NULL) { if (tds->nfsdev_mdsisset == 0 && ds == NULL) ds = tds; else if (tds->nfsdev_mdsisset != 0 && mp->mnt_stat.f_fsid.val[0] == tds->nfsdev_mdsfsid.val[0] && mp->mnt_stat.f_fsid.val[1] == tds->nfsdev_mdsfsid.val[1]) { ds = fds = tds; break; } } } if (ds == NULL) { NFSDDSUNLOCK(); NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); return; } i = dsdir[0] = ds->nfsdev_nextdir; ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; dvp[0] = ds->nfsdev_dsdir[i]; tds = TAILQ_NEXT(ds, nfsdev_list); if (nfsrv_maxpnfsmirror > 1 && tds != NULL) { TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) { if (tds->nfsdev_nmp != NULL && ((tds->nfsdev_mdsisset == 0 && fds == NULL) || (tds->nfsdev_mdsisset != 0 && fds != NULL && mp->mnt_stat.f_fsid.val[0] == tds->nfsdev_mdsfsid.val[0] && mp->mnt_stat.f_fsid.val[1] == tds->nfsdev_mdsfsid.val[1]))) { dsdir[mirrorcnt] = i; dvp[mirrorcnt] = tds->nfsdev_dsdir[i]; mirrorcnt++; if (mirrorcnt >= nfsrv_maxpnfsmirror) break; } } } /* Put at end of list to implement round-robin usage. */ TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); NFSDDSUNLOCK(); dsc = NULL; if (mirrorcnt > 1) tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, M_WAITOK | M_ZERO); tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK | M_ZERO); error = nfsvno_getfh(vp, &fh, p); if (error == 0) error = VOP_GETATTR(vp, &va, cred); if (error == 0) { /* Set the attributes for "vp" to Setattr the DS vp. */ vauid = va.va_uid; vagid = va.va_gid; vamode = va.va_mode; VATTR_NULL(&va); va.va_uid = vauid; va.va_gid = vagid; va.va_mode = vamode; va.va_size = 0; } else printf("pNFS: pnfscreate getfh+attr=%d\n", error); NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, cred->cr_gid); /* Make data file name based on FH. */ tcred = newnfs_getcred(); /* * Create the file on each DS mirror, using kernel process(es) for the * additional mirrors. */ failpos = -1; for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { tpf->dsf_dir = dsdir[i]; tdsc->tcred = tcred; tdsc->p = p; tdsc->pf = tpf; tdsc->createva = *vap; NFSBCOPY(&fh, &tdsc->fh, sizeof(fh)); tdsc->va = va; tdsc->dvp = dvp[i]; tdsc->done = 0; tdsc->inprog = 0; tdsc->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_dscreate, tdsc); NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, NULL, tcred, p, NULL); if (ret != 0) { KASSERT(error == 0, ("nfsrv_dscreate err=%d", error)); if (failpos == -1 && nfsds_failerr(ret)) failpos = i; else error = ret; } } } if (error == 0) { tpf->dsf_dir = dsdir[mirrorcnt - 1]; error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, &dsattr, NULL, tcred, p, NULL); if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) { failpos = mirrorcnt - 1; error = 0; } } timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; /* Wait for kernel task(s) to complete. */ for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { while (tdsc->inprog != 0 && tdsc->done == 0) tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); if (tdsc->err != 0) { if (failpos == -1 && nfsds_failerr(tdsc->err)) failpos = i; else if (error == 0) error = tdsc->err; } } /* * If failpos has been set, that mirror has failed, so it needs * to be disabled. */ if (failpos >= 0) { nmp = VFSTONFS(dvp[failpos]->v_mount); NFSLOCKMNT(nmp); if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { nmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(nmp); ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, ds); if (ds != NULL) nfsrv_killrpcs(nmp); NFSLOCKMNT(nmp); nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(nmp); } NFSUNLOCKMNT(nmp); } NFSFREECRED(tcred); if (error == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n", mirrorcnt, nfsrv_maxpnfsmirror); /* * For all mirrors that couldn't be created, fill in the * *pf structure, but with an IP address == 0.0.0.0. */ tpf = pf + mirrorcnt; for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) { *tpf = *pf; tpf->dsf_sin.sin_family = AF_INET; tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in); tpf->dsf_sin.sin_addr.s_addr = 0; tpf->dsf_sin.sin_port = 0; } error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p); if (error == 0) error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); if (error != 0) printf("pNFS: pnfscreate setextattr=%d\n", error); } else printf("pNFS: pnfscreate=%d\n", error); free(pf, M_TEMP); free(dsc, M_TEMP); } /* * Get the information needed to remove the pNFS Data Server file from the * Metadata file. Upon success, ddvp is set non-NULL to the locked * DS directory vnode. The caller must unlock *ddvp when done with it. */ static void nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, int *mirrorcntp, char *fname, fhandle_t *fhp) { struct vattr va; struct ucred *tcred; char *buf; int buflen, error; dvpp[0] = NULL; /* If not an exported regular file or not a pNFS server, just return. */ if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || nfsrv_devidcnt == 0) return; /* Check to see if this is the last hard link. */ tcred = newnfs_getcred(); error = VOP_GETATTR(vp, &va, tcred); NFSFREECRED(tcred); if (error != 0) { printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); return; } if (va.va_nlink > 1) return; error = nfsvno_getfh(vp, fhp, p); if (error != 0) { printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); return; } buflen = 1024; buf = malloc(buflen, M_TEMP, M_WAITOK); /* Get the directory vnode for the DS mount and the file handle. */ error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp, NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); free(buf, M_TEMP); if (error != 0) printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); } /* * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. * The arguments are in a structure, so that they can be passed through * taskqueue for a kernel process to execute this function. */ struct nfsrvdsremove { int done; int inprog; struct task tsk; struct ucred *tcred; struct vnode *dvp; NFSPROC_T *p; int err; char fname[PNFS_FILENAME_LEN + 1]; }; static int nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, NFSPROC_T *p) { struct nameidata named; struct vnode *nvp; char *bufp; u_long *hashp; int error; error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); if (error != 0) return (error); named.ni_cnd.cn_nameiop = DELETE; named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; named.ni_cnd.cn_cred = tcred; named.ni_cnd.cn_thread = p; named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; nfsvno_setpathbuf(&named, &bufp, &hashp); named.ni_cnd.cn_nameptr = bufp; named.ni_cnd.cn_namelen = strlen(fname); strlcpy(bufp, fname, NAME_MAX); NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); if (error == 0) { error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); vput(nvp); } NFSVOPUNLOCK(dvp); nfsvno_relpathbuf(&named); if (error != 0) printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); return (error); } /* * Start up the thread that will execute nfsrv_dsremove(). */ static void start_dsremove(void *arg, int pending) { struct nfsrvdsremove *dsrm; dsrm = (struct nfsrvdsremove *)arg; dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, dsrm->p); dsrm->done = 1; NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); } /* * Remove a pNFS data file from a Data Server. * nfsrv_pnfsremovesetup() must have been called before the MDS file was * removed to set up the dvp and fill in the FH. */ static void nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp, NFSPROC_T *p) { struct ucred *tcred; struct nfsrvdsremove *dsrm, *tdsrm; struct nfsdevice *ds; struct nfsmount *nmp; int failpos, i, ret, timo; tcred = newnfs_getcred(); dsrm = NULL; if (mirrorcnt > 1) dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); /* * Remove the file on each DS mirror, using kernel process(es) for the * additional mirrors. */ failpos = -1; for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { tdsrm->tcred = tcred; tdsrm->p = p; tdsrm->dvp = dvp[i]; strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); tdsrm->inprog = 0; tdsrm->done = 0; tdsrm->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_dsremove, tdsrm); NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_dsremove(dvp[i], fname, tcred, p); if (failpos == -1 && nfsds_failerr(ret)) failpos = i; } } ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret)) failpos = mirrorcnt - 1; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; /* Wait for kernel task(s) to complete. */ for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { while (tdsrm->inprog != 0 && tdsrm->done == 0) tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); if (failpos == -1 && nfsds_failerr(tdsrm->err)) failpos = i; } /* * If failpos has been set, that mirror has failed, so it needs * to be disabled. */ if (failpos >= 0) { nmp = VFSTONFS(dvp[failpos]->v_mount); NFSLOCKMNT(nmp); if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { nmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(nmp); ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, ds); if (ds != NULL) nfsrv_killrpcs(nmp); NFSLOCKMNT(nmp); nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(nmp); } NFSUNLOCKMNT(nmp); } /* Get rid all layouts for the file. */ nfsrv_freefilelayouts(fhp); NFSFREECRED(tcred); free(dsrm, M_TEMP); } /* * Generate a file name based on the file handle and put it in *bufp. * Return the number of bytes generated. */ static int nfsrv_putfhname(fhandle_t *fhp, char *bufp) { int i; uint8_t *cp; const uint8_t *hexdigits = "0123456789abcdef"; cp = (uint8_t *)fhp; for (i = 0; i < sizeof(*fhp); i++) { bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; } bufp[2 * i] = '\0'; return (2 * i); } /* * Update the Metadata file's attributes from the DS file when a Read/Write * layout is returned. * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. */ int nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) { struct ucred *tcred; int error; /* Do this as root so that it won't fail with EACCES. */ tcred = newnfs_getcred(); error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, NULL, NULL, NULL, nap, NULL, NULL, 0, NULL); NFSFREECRED(tcred); return (error); } /* * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. */ static int nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, NFSPROC_T *p) { int error; error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL, NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL); return (error); } static int nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, struct thread *p, int ioproc, struct mbuf **mpp, struct nfsrv_descript *nd, struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp, off_t *offp, int content, bool *eofp) { struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; fhandle_t fh[NFSDEV_MAXMIRRORS]; struct vnode *dvp[NFSDEV_MAXMIRRORS]; struct nfsdevice *ds; struct pnfsdsattr dsattr; struct opnfsdsattr odsattr; char *buf; int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; NFSD_DEBUG(4, "in nfsrv_proxyds\n"); /* * If not a regular file, not exported or not a pNFS server, * just return ENOENT. */ if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || nfsrv_devidcnt == 0) return (ENOENT); buflen = 1024; buf = malloc(buflen, M_TEMP, M_WAITOK); error = 0; /* * For Getattr, get the Change attribute (va_filerev) and size (va_size) * from the MetaData file's extended attribute. */ if (ioproc == NFSPROC_GETATTR) { error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, p); if (error == 0) { if (buflen == sizeof(odsattr)) { NFSBCOPY(buf, &odsattr, buflen); nap->na_filerev = odsattr.dsa_filerev; nap->na_size = odsattr.dsa_size; nap->na_atime = odsattr.dsa_atime; nap->na_mtime = odsattr.dsa_mtime; /* * Fake na_bytes by rounding up na_size. * Since we don't know the block size, just * use BLKDEV_IOSIZE. */ nap->na_bytes = (odsattr.dsa_size + BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1); } else if (buflen == sizeof(dsattr)) { NFSBCOPY(buf, &dsattr, buflen); nap->na_filerev = dsattr.dsa_filerev; nap->na_size = dsattr.dsa_size; nap->na_atime = dsattr.dsa_atime; nap->na_mtime = dsattr.dsa_mtime; nap->na_bytes = dsattr.dsa_bytes; } else error = ENXIO; } if (error == 0) { /* * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() * returns 0, just return now. nfsrv_checkdsattr() * returns 0 if there is no Read/Write layout * plus either an Open/Write_access or Write * delegation issued to a client for the file. */ if (nfsrv_pnfsgetdsattr == 0 || nfsrv_checkdsattr(vp, p) == 0) { free(buf, M_TEMP); return (error); } } /* * Clear ENOATTR so the code below will attempt to do a * nfsrv_getattrdsrpc() to get the attributes and (re)create * the extended attribute. */ if (error == ENOATTR) error = 0; } origmircnt = -1; trycnt = 0; tryagain: if (error == 0) { buflen = 1024; if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) printf("nfsrv_proxyds: Readds vp exclusively locked\n"); error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL, NULL, NULL); if (error == 0) { for (i = 0; i < mirrorcnt; i++) nmp[i] = VFSTONFS(dvp[i]->v_mount); } else printf("pNFS: proxy getextattr sockaddr=%d\n", error); } else printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); if (error == 0) { failpos = -1; if (origmircnt == -1) origmircnt = mirrorcnt; /* * If failpos is set to a mirror#, then that mirror has * failed and will be disabled. For Read, Getattr and Seek, the * function only tries one mirror, so if that mirror has * failed, it will need to be retried. As such, increment * tryitagain for these cases. * For Write, Setattr and Setacl, the function tries all * mirrors and will not return an error for the case where * one mirror has failed. For these cases, the functioning * mirror(s) will have been modified, so a retry isn't * necessary. These functions will set failpos for the * failed mirror#. */ if (ioproc == NFSPROC_READDS) { error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], mpp, mpp2); if (nfsds_failerr(error) && mirrorcnt > 1) { /* * Setting failpos will cause the mirror * to be disabled and then a retry of this * read is required. */ failpos = 0; error = 0; trycnt++; } } else if (ioproc == NFSPROC_WRITEDS) error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, &nmp[0], mirrorcnt, mpp, nd, &failpos); else if (ioproc == NFSPROC_SETATTR) error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], mirrorcnt, nap, &failpos); else if (ioproc == NFSPROC_SETACL) error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], mirrorcnt, aclp, &failpos); else if (ioproc == NFSPROC_SEEKDS) { error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred, p, nmp[0]); if (nfsds_failerr(error) && mirrorcnt > 1) { /* * Setting failpos will cause the mirror * to be disabled and then a retry of this * read is required. */ failpos = 0; error = 0; trycnt++; } } else if (ioproc == NFSPROC_ALLOCATE) error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp, &nmp[0], mirrorcnt, &failpos); else { error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, vp, nmp[mirrorcnt - 1], nap); if (nfsds_failerr(error) && mirrorcnt > 1) { /* * Setting failpos will cause the mirror * to be disabled and then a retry of this * getattr is required. */ failpos = mirrorcnt - 1; error = 0; trycnt++; } } ds = NULL; if (failpos >= 0) { failnmp = nmp[failpos]; NFSLOCKMNT(failnmp); if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(failnmp); ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, failnmp, p); NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", failpos, ds); if (ds != NULL) nfsrv_killrpcs(failnmp); NFSLOCKMNT(failnmp); failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(failnmp); } NFSUNLOCKMNT(failnmp); } for (i = 0; i < mirrorcnt; i++) NFSVOPUNLOCK(dvp[i]); NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, trycnt); /* Try the Read/Getattr again if a mirror was deleted. */ if (ds != NULL && trycnt > 0 && trycnt < origmircnt) goto tryagain; } else { /* Return ENOENT for any Extended Attribute error. */ error = ENOENT; } free(buf, M_TEMP); NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); return (error); } /* * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended * attribute. * newnmpp - If it points to a non-NULL nmp, that is the destination and needs * to be checked. If it points to a NULL nmp, then it returns * a suitable destination. * curnmp - If non-NULL, it is the source mount for the copy. */ int nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp, char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, struct nfsmount *curnmp, int *ippos, int *dsdirp) { struct vnode *dvp, *nvp = NULL, **tdvpp; struct mount *mp; struct nfsmount *nmp, *newnmp; struct sockaddr *sad; struct sockaddr_in *sin; struct nfsdevice *ds, *tds, *fndds; struct pnfsdsfile *pf; uint32_t dsdir; int error, fhiszero, fnd, gotone, i, mirrorcnt; ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); *mirrorcntp = 1; tdvpp = dvpp; if (nvpp != NULL) *nvpp = NULL; if (dvpp != NULL) *dvpp = NULL; if (ippos != NULL) *ippos = -1; if (newnmpp != NULL) newnmp = *newnmpp; else newnmp = NULL; mp = vp->v_mount; error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", buflenp, buf, p); mirrorcnt = *buflenp / sizeof(*pf); if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || *buflenp != sizeof(*pf) * mirrorcnt)) error = ENOATTR; pf = (struct pnfsdsfile *)buf; /* If curnmp != NULL, check for a match in the mirror list. */ if (curnmp != NULL && error == 0) { fnd = 0; for (i = 0; i < mirrorcnt; i++, pf++) { sad = (struct sockaddr *)&pf->dsf_sin; if (nfsaddr2_match(sad, curnmp->nm_nam)) { if (ippos != NULL) *ippos = i; fnd = 1; break; } } if (fnd == 0) error = ENXIO; } gotone = 0; pf = (struct pnfsdsfile *)buf; NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt, error); for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { fhiszero = 0; sad = (struct sockaddr *)&pf->dsf_sin; sin = &pf->dsf_sin; dsdir = pf->dsf_dir; if (dsdir >= nfsrv_dsdirsize) { printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); error = ENOATTR; } else if (nvpp != NULL && newnmp != NULL && nfsaddr2_match(sad, newnmp->nm_nam)) error = EEXIST; if (error == 0) { if (ippos != NULL && curnmp == NULL && sad->sa_family == AF_INET && sin->sin_addr.s_addr == 0) *ippos = i; if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) fhiszero = 1; /* Use the socket address to find the mount point. */ fndds = NULL; NFSDDSLOCK(); /* Find a match for the IP address. */ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp != NULL) { dvp = ds->nfsdev_dvp; nmp = VFSTONFS(dvp->v_mount); if (nmp != ds->nfsdev_nmp) printf("different2 nmp %p %p\n", nmp, ds->nfsdev_nmp); if (nfsaddr2_match(sad, nmp->nm_nam)) { fndds = ds; break; } } } if (fndds != NULL && newnmpp != NULL && newnmp == NULL) { /* Search for a place to make a mirror copy. */ TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { if (tds->nfsdev_nmp != NULL && fndds != tds && ((tds->nfsdev_mdsisset == 0 && fndds->nfsdev_mdsisset == 0) || (tds->nfsdev_mdsisset != 0 && fndds->nfsdev_mdsisset != 0 && tds->nfsdev_mdsfsid.val[0] == mp->mnt_stat.f_fsid.val[0] && tds->nfsdev_mdsfsid.val[1] == mp->mnt_stat.f_fsid.val[1]))) { *newnmpp = tds->nfsdev_nmp; break; } } if (tds != NULL) { /* * Move this entry to the end of the * list, so it won't be selected as * easily the next time. */ TAILQ_REMOVE(&nfsrv_devidhead, tds, nfsdev_list); TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds, nfsdev_list); } } NFSDDSUNLOCK(); if (fndds != NULL) { dvp = fndds->nfsdev_dsdir[dsdir]; if (lktype != 0 || fhiszero != 0 || (nvpp != NULL && *nvpp == NULL)) { if (fhiszero != 0) error = vn_lock(dvp, LK_EXCLUSIVE); else if (lktype != 0) error = vn_lock(dvp, lktype); else error = vn_lock(dvp, LK_SHARED); /* * If the file handle is all 0's, try to * do a Lookup against the DS to acquire * it. * If dvpp == NULL or the Lookup fails, * unlock dvp after the call. */ if (error == 0 && (fhiszero != 0 || (nvpp != NULL && *nvpp == NULL))) { error = nfsrv_pnfslookupds(vp, dvp, pf, &nvp, p); if (error == 0) { if (fhiszero != 0) nfsrv_pnfssetfh( vp, pf, devid, fnamep, nvp, p); if (nvpp != NULL && *nvpp == NULL) { *nvpp = nvp; *dsdirp = dsdir; } else vput(nvp); } if (error != 0 || lktype == 0) NFSVOPUNLOCK(dvp); } } if (error == 0) { gotone++; NFSD_DEBUG(4, "gotone=%d\n", gotone); if (devid != NULL) { NFSBCOPY(fndds->nfsdev_deviceid, devid, NFSX_V4DEVICEID); devid += NFSX_V4DEVICEID; } if (dvpp != NULL) *tdvpp++ = dvp; if (fhp != NULL) NFSBCOPY(&pf->dsf_fh, fhp++, NFSX_MYFH); if (fnamep != NULL && gotone == 1) strlcpy(fnamep, pf->dsf_filename, sizeof(pf->dsf_filename)); } else NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " "err=%d\n", error); } } } if (error == 0 && gotone == 0) error = ENOENT; NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, error); if (error == 0) *mirrorcntp = gotone; else { if (gotone > 0 && dvpp != NULL) { /* * If the error didn't occur on the first one and * dvpp != NULL, the one(s) prior to the failure will * have locked dvp's that need to be unlocked. */ for (i = 0; i < gotone; i++) { NFSVOPUNLOCK(*dvpp); *dvpp++ = NULL; } } /* * If it found the vnode to be copied from before a failure, * it needs to be vput()'d. */ if (nvpp != NULL && *nvpp != NULL) { vput(*nvpp); *nvpp = NULL; } } return (error); } /* * Set the extended attribute for the Change attribute. */ static int nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) { struct pnfsdsattr dsattr; int error; ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); dsattr.dsa_filerev = nap->na_filerev; dsattr.dsa_size = nap->na_size; dsattr.dsa_atime = nap->na_atime; dsattr.dsa_mtime = nap->na_mtime; dsattr.dsa_bytes = nap->na_bytes; error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); if (error != 0) printf("pNFS: setextattr=%d\n", error); return (error); } static int nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) { uint32_t *tl; struct nfsrv_descript *nd; nfsv4stateid_t st; struct mbuf *m, *m2; int error = 0, retlen, tlen, trimlen; NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); *mpp = NULL; /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), NULL, NULL, 0, 0, false); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); txdr_hyper(off, tl); *(tl + 2) = txdr_unsigned(len); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); NFSM_STRSIZ(retlen, len); if (retlen > 0) { /* Trim off the pre-data XDR from the mbuf chain. */ m = nd->nd_mrep; while (m != NULL && m != nd->nd_md) { if (m->m_next == nd->nd_md) { m->m_next = NULL; m_freem(nd->nd_mrep); nd->nd_mrep = m = nd->nd_md; } else m = m->m_next; } if (m == NULL) { printf("nfsrv_readdsrpc: busted mbuf list\n"); error = ENOENT; goto nfsmout; } /* * Now, get rid of mbuf data that preceeds the * current position. For a regular mbuf, adjust * m_data, m_len and then find the end of the read * data and trim off any mbuf(s) after that. * For an ext_pgs mbuf, split it and free the first * and third mbuf chains. */ tlen = NFSM_RNDUP(retlen); if ((m->m_flags & M_NOMAP) != 0) { trimlen = nfsm_extpgs_calc_offs(m, nd->nd_dextpg, nd->nd_dextpgsiz); nd->nd_mrep = mb_splitatpos_ext(m, trimlen, M_WAITOK); m_freem(m); m = mb_splitatpos_ext(nd->nd_mrep, tlen, M_WAITOK); m_freem(m); m = m_last(nd->nd_mrep); } else { trimlen = nd->nd_dpos - mtod(m, char *); if (trimlen > 0) { m->m_len -= trimlen; m->m_data += trimlen; } /* * Truncate the mbuf chain at retlen bytes of * data, plus XDR padding that brings the * length up to a multiple of 4. */ do { if (m->m_len >= tlen) { m->m_len = tlen; tlen = 0; m2 = m->m_next; m->m_next = NULL; m_freem(m2); break; } tlen -= m->m_len; m = m->m_next; } while (m != NULL); if (tlen > 0) { printf("nfsrv_readdsrpc: busted mbuf " "list\n"); error = ENOENT; goto nfsmout; } } *mpp = nd->nd_mrep; *mpendp = m; nd->nd_mrep = NULL; } } else error = nd->nd_repstat; nfsmout: /* If nd->nd_mrep is already NULL, this is a no-op. */ m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); return (error); } /* * Do a write RPC on a DS data file, using this structure for the arguments, * so that this function can be executed by a separate kernel process. */ struct nfsrvwritedsdorpc { int done; int inprog; struct task tsk; fhandle_t fh; off_t off; int len; struct nfsmount *nmp; struct ucred *cred; NFSPROC_T *p; struct mbuf *m; int err; }; static int nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript *nd; nfsattrbit_t attrbits; nfsv4stateid_t st; int commit, error, retlen; nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0, false); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); txdr_hyper(off, tl); tl += 2; /* * Do all writes FileSync, since the server doesn't hold onto dirty * buffers. Since clients should be accessing the DS servers directly * using the pNFS layouts, this just needs to work correctly as a * fallback. */ *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); *tl = txdr_unsigned(len); NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); /* Put data in mbuf chain. */ nd->nd_mb->m_next = m; if ((m->m_flags & M_NOMAP) != 0) nd->nd_flag |= ND_NOMAP; /* Set nd_mb and nd_bpos to end of data. */ while (m->m_next != NULL) m = m->m_next; nd->nd_mb = m; nfsm_set(nd, m->m_len, true); NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); /* Do a Getattr for the attributes that change upon writing. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); /* Get rid of weak cache consistency data for now. */ if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); if (error != 0) goto nfsmout; /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); retlen = fxdr_unsigned(int, *tl++); commit = fxdr_unsigned(int, *tl); if (commit != NFSWRITE_FILESYNC) error = NFSERR_IO; NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", retlen, commit, error); } else error = nd->nd_repstat; /* We have no use for the Write Verifier since we use FileSync. */ /* * Get the Change, Size, Access Time and Modify Time attributes and set * on the Metadata file, so its attributes will be what the file's * would be if it had been written. */ if (error == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); return (error); } /* * Start up the thread that will execute nfsrv_writedsdorpc(). */ static void start_writedsdorpc(void *arg, int pending) { struct nfsrvwritedsdorpc *drpc; drpc = (struct nfsrvwritedsdorpc *)arg; drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, drpc->len, NULL, drpc->m, drpc->cred, drpc->p); drpc->done = 1; NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); } static int nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct mbuf **mpp, struct nfsrv_descript *nd, int *failposp) { struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL; struct nfsvattr na; struct mbuf *m, *m1, *m2; int error, i, offs, ret, timo; bool gotnomap; NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy len=%d\n", len); /* * For M_NOMAP mbufs, the mbuf chain needs to be split into 3 chains * so that m_copym() can be done with offs == 0 and M_COPYALL. * *mpp - Everything that preceeds the data to be written. * m1 - The data to be written. * m2 - Everything that follows the data to be written. */ m1 = *mpp; gotnomap = false; if ((m1->m_flags & M_NOMAP) != 0) { gotnomap = true; offs = nfsm_extpgs_calc_offs(nd->nd_md, nd->nd_dextpg, nd->nd_dextpgsiz); m1 = mb_splitatpos_ext(m1, offs, M_WAITOK); m2 = mb_splitatpos_ext(m1, NFSM_RNDUP(len), M_WAITOK); } else offs = nd->nd_dpos - mtod(m1, char *); /* * Do the write RPC for every DS, using a separate kernel process * for every DS except the last one. */ error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { tdrpc->done = 0; NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); tdrpc->off = off; tdrpc->len = len; tdrpc->nmp = *nmpp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->inprog = 0; tdrpc->err = 0; if (gotnomap) tdrpc->m = m_copym(m1, 0, M_COPYALL, M_WAITOK); else tdrpc->m = m_copym(m1, offs, NFSM_RNDUP(len), M_WAITOK); ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_writedsdorpc, tdrpc); NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, tdrpc->m, cred, p); if (nfsds_failerr(ret) && *failposp == -1) *failposp = i; else if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } if (gotnomap) m = m_copym(m1, 0, M_COPYALL, M_WAITOK); else m = m_copym(m1, offs, NFSM_RNDUP(len), M_WAITOK); ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) *failposp = mirrorcnt - 1; else if (error == 0 && ret != 0) error = ret; if (error == 0) error = nfsrv_setextattr(vp, &na, p); NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { /* Wait for RPCs on separate threads to complete. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); if (nfsds_failerr(tdrpc->err) && *failposp == -1) *failposp = i; else if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } /* For gotnomap, chain the lists back to-gether. */ if (gotnomap) { m_last(*mpp)->m_next = m1; m_last(m1)->m_next = m2; nd->nd_md = m1; nfsm_set(nd, 0, false); } free(drpc, M_TEMP); return (error); } /* * Do a allocate RPC on a DS data file, using this structure for the arguments, * so that this function can be executed by a separate kernel process. */ struct nfsrvallocatedsdorpc { int done; int inprog; struct task tsk; fhandle_t fh; off_t off; off_t len; struct nfsmount *nmp; struct ucred *cred; NFSPROC_T *p; int err; }; static int nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript *nd; nfsattrbit_t attrbits; nfsv4stateid_t st; int error; nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0, false); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n", nd->nd_repstat); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } else error = nd->nd_repstat; NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error); nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error); return (error); } /* * Start up the thread that will execute nfsrv_allocatedsdorpc(). */ static void start_allocatedsdorpc(void *arg, int pending) { struct nfsrvallocatedsdorpc *drpc; drpc = (struct nfsrvallocatedsdorpc *)arg; drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, drpc->len, NULL, drpc->cred, drpc->p); drpc->done = 1; NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err); } static int nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, int *failposp) { struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL; struct nfsvattr na; int error, i, ret, timo; NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n"); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* * Do the allocate RPC for every DS, using a separate kernel process * for every DS except the last one. */ error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { tdrpc->done = 0; NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); tdrpc->off = off; tdrpc->len = len; tdrpc->nmp = *nmpp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->inprog = 0; tdrpc->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc); NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL, cred, p); if (nfsds_failerr(ret) && *failposp == -1) *failposp = i; else if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) *failposp = mirrorcnt - 1; else if (error == 0 && ret != 0) error = ret; if (error == 0) error = nfsrv_setextattr(vp, &na, p); NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { /* Wait for RPCs on separate threads to complete. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); if (nfsds_failerr(tdrpc->err) && *failposp == -1) *failposp = i; else if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); return (error); } static int nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, struct nfsvattr *dsnap) { uint32_t *tl; struct nfsrv_descript *nd; nfsv4stateid_t st; nfsattrbit_t attrbits; int error; NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), NULL, NULL, 0, 0, false); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); /* Do a Getattr for the attributes that change due to writing. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", nd->nd_repstat); /* Get rid of weak cache consistency data for now. */ if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); if (error != 0) goto nfsmout; /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; } error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error != 0) goto nfsmout; if (nd->nd_repstat != 0) error = nd->nd_repstat; /* * Get the Change, Size, Access Time and Modify Time attributes and set * on the Metadata file, so its attributes will be what the file's * would be if it had been written. */ if (error == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); return (error); } struct nfsrvsetattrdsdorpc { int done; int inprog; struct task tsk; fhandle_t fh; struct nfsmount *nmp; struct vnode *vp; struct ucred *cred; NFSPROC_T *p; struct nfsvattr na; struct nfsvattr dsna; int err; }; /* * Start up the thread that will execute nfsrv_setattrdsdorpc(). */ static void start_setattrdsdorpc(void *arg, int pending) { struct nfsrvsetattrdsdorpc *drpc; drpc = (struct nfsrvsetattrdsdorpc *)arg; drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); drpc->done = 1; } static int nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct nfsvattr *nap, int *failposp) { struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL; struct nfsvattr na; int error, i, ret, timo; NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* * Do the setattr RPC for every DS, using a separate kernel process * for every DS except the last one. */ error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { tdrpc->done = 0; tdrpc->inprog = 0; NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); tdrpc->nmp = *nmpp; tdrpc->vp = vp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->na = *nap; tdrpc->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); if (nfsds_failerr(ret) && *failposp == -1) *failposp = i; else if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) *failposp = mirrorcnt - 1; else if (error == 0 && ret != 0) error = ret; if (error == 0) error = nfsrv_setextattr(vp, &na, p); NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { /* Wait for RPCs on separate threads to complete. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); if (nfsds_failerr(tdrpc->err) && *failposp == -1) *failposp = i; else if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); return (error); } /* * Do a Setattr of an NFSv4 ACL on the DS file. */ static int nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) { struct nfsrv_descript *nd; nfsv4stateid_t st; nfsattrbit_t attrbits; int error; NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), NULL, NULL, 0, 0, false); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); /* * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), * so passing in the metadata "vp" will be ok, since it is of * the same type (VREG). */ nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, 0, NULL); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", nd->nd_repstat); error = nd->nd_repstat; m_freem(nd->nd_mrep); free(nd, M_TEMP); return (error); } struct nfsrvsetacldsdorpc { int done; int inprog; struct task tsk; fhandle_t fh; struct nfsmount *nmp; struct vnode *vp; struct ucred *cred; NFSPROC_T *p; struct acl *aclp; int err; }; /* * Start up the thread that will execute nfsrv_setacldsdorpc(). */ static void start_setacldsdorpc(void *arg, int pending) { struct nfsrvsetacldsdorpc *drpc; drpc = (struct nfsrvsetacldsdorpc *)arg; drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, drpc->vp, drpc->nmp, drpc->aclp); drpc->done = 1; } static int nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, int *failposp) { struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL; int error, i, ret, timo; NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* * Do the setattr RPC for every DS, using a separate kernel process * for every DS except the last one. */ error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { tdrpc->done = 0; tdrpc->inprog = 0; NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); tdrpc->nmp = *nmpp; tdrpc->vp = vp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->aclp = aclp; tdrpc->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); if (nfsds_failerr(ret) && *failposp == -1) *failposp = i; else if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) *failposp = mirrorcnt - 1; else if (error == 0 && ret != 0) error = ret; NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { /* Wait for RPCs on separate threads to complete. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); if (nfsds_failerr(tdrpc->err) && *failposp == -1) *failposp = i; else if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); return (error); } /* * Getattr call to the DS for the attributes that change due to writing. */ static int nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) { struct nfsrv_descript *nd; int error; nfsattrbit_t attrbits; NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0, false); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); (void) nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", nd->nd_repstat); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); /* * We can only save the updated values in the extended * attribute if the vp is exclusively locked. * This should happen when any of the following operations * occur on the vnode: * Close, Delegreturn, LayoutCommit, LayoutReturn * As such, the updated extended attribute should get saved * before nfsrv_checkdsattr() returns 0 and allows the cached * attributes to be returned without calling this function. */ if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { error = nfsrv_setextattr(vp, nap, p); NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", error); } } else error = nd->nd_repstat; m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); return (error); } /* * Seek call to a DS. */ static int nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp) { uint32_t *tl; struct nfsrv_descript *nd; nfsv4stateid_t st; int error; NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n"); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0, false); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); txdr_hyper(*offp, tl); tl += 2; *tl = txdr_unsigned(content); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); if (*tl++ == newnfs_true) *eofp = true; else *eofp = false; *offp = fxdr_hyper(tl); } else error = nd->nd_repstat; nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error); return (error); } /* * Get the device id and file handle for a DS file. */ int nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, fhandle_t *fhp, char *devid) { int buflen, error; char *buf; buflen = 1024; buf = malloc(buflen, M_TEMP, M_WAITOK); error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL, fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL); free(buf, M_TEMP); return (error); } /* * Do a Lookup against the DS for the filename. */ static int nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, struct vnode **nvpp, NFSPROC_T *p) { struct nameidata named; struct ucred *tcred; char *bufp; u_long *hashp; struct vnode *nvp; int error; tcred = newnfs_getcred(); named.ni_cnd.cn_nameiop = LOOKUP; named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; named.ni_cnd.cn_cred = tcred; named.ni_cnd.cn_thread = p; named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; nfsvno_setpathbuf(&named, &bufp, &hashp); named.ni_cnd.cn_nameptr = bufp; named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); strlcpy(bufp, pf->dsf_filename, NAME_MAX); NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); NFSFREECRED(tcred); nfsvno_relpathbuf(&named); if (error == 0) *nvpp = nvp; NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); return (error); } /* * Set the file handle to the correct one. */ static void nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid, char *fnamep, struct vnode *nvp, NFSPROC_T *p) { struct nfsnode *np; int ret = 0; np = VTONFS(nvp); NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); /* * We can only do a vn_set_extattr() if the vnode is exclusively * locked and vn_start_write() has been done. If devid != NULL or * fnamep != NULL or the vnode is shared locked, vn_start_write() * may not have been done. * If not done now, it will be done on a future call. */ if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) ret = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf), (char *)pf, p); NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); } /* * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point * when the DS has failed. */ void nfsrv_killrpcs(struct nfsmount *nmp) { /* * Call newnfs_nmcancelreqs() to cause * any RPCs in progress on the mount point to * fail. * This will cause any process waiting for an * RPC to complete while holding a vnode lock * on the mounted-on vnode (such as "df" or * a non-forced "umount") to fail. * This will unlock the mounted-on vnode so * a forced dismount can succeed. * The NFSMNTP_CANCELRPCS flag should be set when this function is * called. */ newnfs_nmcancelreqs(nmp); } /* * Sum up the statfs info for each of the DSs, so that the client will * receive the total for all DSs. */ static int nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp) { struct statfs *tsf; struct nfsdevice *ds; struct vnode **dvpp, **tdvpp, *dvp; uint64_t tot; int cnt, error = 0, i; if (nfsrv_devidcnt <= 0) return (ENXIO); dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); /* Get an array of the dvps for the DSs. */ tdvpp = dvpp; i = 0; NFSDDSLOCK(); /* First, search for matches for same file system. */ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && ds->nfsdev_mdsfsid.val[0] == mp->mnt_stat.f_fsid.val[0] && ds->nfsdev_mdsfsid.val[1] == mp->mnt_stat.f_fsid.val[1]) { if (++i > nfsrv_devidcnt) break; *tdvpp++ = ds->nfsdev_dvp; } } /* * If no matches for same file system, total all servers not assigned * to a file system. */ if (i == 0) { TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset == 0) { if (++i > nfsrv_devidcnt) break; *tdvpp++ = ds->nfsdev_dvp; } } } NFSDDSUNLOCK(); cnt = i; /* Do a VFS_STATFS() for each of the DSs and sum them up. */ tdvpp = dvpp; for (i = 0; i < cnt && error == 0; i++) { dvp = *tdvpp++; error = VFS_STATFS(dvp->v_mount, tsf); if (error == 0) { if (sf->f_bsize == 0) { if (tsf->f_bsize > 0) sf->f_bsize = tsf->f_bsize; else sf->f_bsize = 8192; } if (tsf->f_blocks > 0) { if (sf->f_bsize != tsf->f_bsize) { tot = tsf->f_blocks * tsf->f_bsize; sf->f_blocks += (tot / sf->f_bsize); } else sf->f_blocks += tsf->f_blocks; } if (tsf->f_bfree > 0) { if (sf->f_bsize != tsf->f_bsize) { tot = tsf->f_bfree * tsf->f_bsize; sf->f_bfree += (tot / sf->f_bsize); } else sf->f_bfree += tsf->f_bfree; } if (tsf->f_bavail > 0) { if (sf->f_bsize != tsf->f_bsize) { tot = tsf->f_bavail * tsf->f_bsize; sf->f_bavail += (tot / sf->f_bsize); } else sf->f_bavail += tsf->f_bavail; } } } free(tsf, M_TEMP); free(dvpp, M_TEMP); return (error); } /* * Set an NFSv4 acl. */ int nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) { int error; if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { error = NFSERR_ATTRNOTSUPP; goto out; } /* * With NFSv4 ACLs, chmod(2) may need to add additional entries. * Make sure it has enough room for that - splitting every entry * into two and appending "canonical six" entries at the end. * Cribbed out of kern/vfs_acl.c - Rick M. */ if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { error = NFSERR_ATTRNOTSUPP; goto out; } error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); if (error == 0) { error = nfsrv_dssetacl(vp, aclp, cred, p); if (error == ENOENT) error = 0; } out: NFSEXITCODE(error); return (error); } /* * Seek vnode op call (actually it is a VOP_IOCTL()). * This function is called with the vnode locked, but unlocks and vrele()s * the vp before returning. */ int nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd, off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p) { struct nfsvattr at; int error, ret; ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp"); /* * Attempt to seek on a DS file. A return of ENOENT implies * there is no DS file to seek on. */ error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL, NULL, NULL, NULL, NULL, offp, content, eofp); if (error != ENOENT) { vput(vp); return (error); } /* * Do the VOP_IOCTL() call. For the case where *offp == file_size, * VOP_IOCTL() will return ENXIO. However, the correct reply for * NFSv4.2 is *eofp == true and error == 0 for this case. */ NFSVOPUNLOCK(vp); error = VOP_IOCTL(vp, cmd, offp, 0, cred, p); *eofp = false; if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) { /* Handle the cases where we might be at EOF. */ ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); if (ret == 0 && *offp == at.na_size) { *eofp = true; error = 0; } if (ret != 0 && error == 0) error = ret; } vrele(vp); NFSEXITCODE(error); return (error); } /* * Allocate vnode op call. */ int nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, NFSPROC_T *p) { int error, trycnt; ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp"); /* * Attempt to allocate on a DS file. A return of ENOENT implies * there is no DS file to allocate on. */ error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL, NULL, NULL, NULL, NULL, &len, 0, NULL); if (error != ENOENT) return (error); error = 0; /* * Do the actual VOP_ALLOCATE(), looping a reasonable number of * times to achieve completion. */ trycnt = 0; while (error == 0 && len > 0 && trycnt++ < 20) error = VOP_ALLOCATE(vp, &off, &len); if (error == 0 && len > 0) error = NFSERR_IO; NFSEXITCODE(error); return (error); } /* * Get Extended Atribute vnode op into an mbuf list. */ int nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp, struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) { struct iovec *iv; struct uio io, *uiop = &io; struct mbuf *m, *m2; int alen, error, len, tlen; size_t siz; /* First, find out the size of the extended attribute. */ error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, &siz, cred, p); if (error != 0) return (NFSERR_NOXATTR); if (siz > maxresp - NFS_MAXXDR) return (NFSERR_XATTR2BIG); len = siz; tlen = NFSM_RNDUP(len); /* * If the cnt is larger than MCLBYTES, use ext_pgs if * possible. * Always use ext_pgs if ND_NOMAP is set. */ if ((flag & ND_NOMAP) != 0 || (tlen > MCLBYTES && PMAP_HAS_DMAP != 0 && ((flag & ND_TLS) != 0 || nfs_use_ext_pgs))) uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen, maxextsiz, &m, &m2, &iv); else uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2, &iv); uiop->uio_iov = iv; uiop->uio_offset = 0; uiop->uio_resid = tlen; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = p; #ifdef MAC error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER, name); if (error != 0) goto out; #endif if (tlen > 0) error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, NULL, cred, p); if (error != 0) goto out; if (uiop->uio_resid > 0) { alen = tlen; len = tlen - uiop->uio_resid; tlen = NFSM_RNDUP(len); if (alen != tlen) printf("nfsvno_getxattr: weird size read\n"); if (tlen == 0) { m_freem(m); m = m2 = NULL; } else if (alen != tlen || tlen != len) m2 = nfsrv_adj(m, alen - tlen, tlen - len); } *lenp = len; *mpp = m; *mpendp = m2; out: if (error != 0) { if (m != NULL) m_freem(m); *lenp = 0; } free(iv, M_TEMP); NFSEXITCODE(error); return (error); } /* * Set Extended attribute vnode op from an mbuf list. */ int nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m, char *cp, int dextpg, int dextpgsiz, struct ucred *cred, struct thread *p) { struct iovec *iv; struct uio uio, *uiop = &uio; int cnt, error; error = 0; #ifdef MAC error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER, name); #endif if (error != 0) goto out; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = p; uiop->uio_offset = 0; uiop->uio_resid = len; if ((m->m_flags & M_NOMAP) != 0) error = nfsrv_createiovecw_extpgs(len, m, cp, dextpg, dextpgsiz, &iv, &cnt); else error = nfsrv_createiovecw(len, m, cp, &iv, &cnt); uiop->uio_iov = iv; uiop->uio_iovcnt = cnt; if (error == 0) { error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, cred, p); free(iv, M_TEMP); } out: NFSEXITCODE(error); return (error); } /* * Remove Extended attribute vnode op. */ int nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name, struct ucred *cred, struct thread *p) { int error; /* * Get rid of any delegations. I am not sure why this is required, * but RFC-8276 says so. */ error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p); if (error != 0) goto out; #ifdef MAC error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER, name); if (error != 0) goto out; #endif error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p); if (error == EOPNOTSUPP) error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, cred, p); out: NFSEXITCODE(error); return (error); } /* * List Extended Atribute vnode op into an mbuf list. */ int nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred, struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp) { struct iovec iv; struct uio io; int error; size_t siz; *bufp = NULL; /* First, find out the size of the extended attribute. */ error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred, p); if (error != 0) return (NFSERR_NOXATTR); if (siz <= cookie) { *lenp = 0; *eofp = true; goto out; } if (siz > cookie + *lenp) { siz = cookie + *lenp; *eofp = false; } else *eofp = true; /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */ if (siz > 10 * 1024 * 1024) { error = NFSERR_XATTR2BIG; goto out; } *bufp = malloc(siz, M_TEMP, M_WAITOK); iv.iov_base = *bufp; iv.iov_len = siz; io.uio_iovcnt = 1; io.uio_iov = &iv; io.uio_offset = 0; io.uio_resid = siz; io.uio_rw = UIO_READ; io.uio_segflg = UIO_SYSSPACE; io.uio_td = p; #ifdef MAC error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER); if (error != 0) goto out; #endif error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred, p); if (error != 0) goto out; if (io.uio_resid > 0) siz -= io.uio_resid; *lenp = siz; out: if (error != 0) { free(*bufp, M_TEMP); *bufp = NULL; } NFSEXITCODE(error); return (error); } extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfsd_modevent(module_t mod, int type, void *data) { int error = 0, i; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) goto out; newnfs_portinit(); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, MTX_DEF); mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, MTX_DEF); } mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); nfsrvd_initcache(); nfsd_init(); NFSD_LOCK(); nfsrvd_init(0); NFSD_UNLOCK(); nfsd_mntinit(); #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; #endif nfsd_call_servertimer = nfsrv_servertimer; nfsd_call_nfsd = nfssvc_nfsd; loaded = 1; break; case MOD_UNLOAD: if (newnfs_numnfsd != 0) { error = EBUSY; break; } #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = NULL; vn_deleg_ops.vndeleg_disable = NULL; #endif nfsd_call_servertimer = NULL; nfsd_call_nfsd = NULL; /* Clean out all NFSv4 state. */ nfsrv_throwawayallstate(curthread); /* Clean the NFS server reply cache */ nfsrvd_cleancache(); /* Free up the krpc server pool. */ if (nfsrvd_pool != NULL) svcpool_destroy(nfsrvd_pool); /* and get rid of the locks */ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_destroy(&nfsrchash_table[i].mtx); mtx_destroy(&nfsrcahash_table[i].mtx); } mtx_destroy(&nfsrc_udpmtx); mtx_destroy(&nfs_v4root_mutex); mtx_destroy(&nfsv4root_mnt.mnt_mtx); mtx_destroy(&nfsrv_dontlistlock_mtx); mtx_destroy(&nfsrv_recalllock_mtx); for (i = 0; i < nfsrv_sessionhashsize; i++) mtx_destroy(&nfssessionhash[i].mtx); if (nfslayouthash != NULL) { for (i = 0; i < nfsrv_layouthashsize; i++) mtx_destroy(&nfslayouthash[i].mtx); free(nfslayouthash, M_NFSDSESSION); } lockdestroy(&nfsv4root_mnt.mnt_explock); free(nfsclienthash, M_NFSDCLIENT); free(nfslockhash, M_NFSDLOCKFILE); free(nfssessionhash, M_NFSDSESSION); loaded = 0; break; default: error = EOPNOTSUPP; break; } out: NFSEXITCODE(error); return (error); } static moduledata_t nfsd_mod = { "nfsd", nfsd_modevent, NULL, }; DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfsd, 1); MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); MODULE_DEPEND(nfsd, krpc, 1, 1, 1); MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); Index: projects/nfs-over-tls/sys/fs/nfsserver/nfs_nfsdserv.c =================================================================== --- projects/nfs-over-tls/sys/fs/nfsserver/nfs_nfsdserv.c (revision 360215) +++ projects/nfs-over-tls/sys/fs/nfsserver/nfs_nfsdserv.c (revision 360216) @@ -1,5930 +1,5930 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" /* * nfs version 2, 3 and 4 server calls to vnode ops * - these routines generally have 3 phases * 1 - break down and validate rpc request in mbuf list * 2 - do the vnode ops for the request, usually by calling a nfsvno_XXX() * function in nfsd_port.c * 3 - build the rpc reply in an mbuf list * For nfsv4, these functions are called for each Op within the Compound RPC. */ #ifndef APPLEKEXT #include #include #include /* Global vars */ extern u_int32_t newnfs_false, newnfs_true; extern enum vtype nv34tov_type[8]; extern struct timeval nfsboottime; extern int nfs_rootfhset; extern int nfsrv_enable_crossmntpt; extern int nfsrv_statehashsize; extern int nfsrv_layouthashsize; extern time_t nfsdev_time; extern volatile int nfsrv_devidcnt; extern int nfsd_debuglevel; extern u_long sb_max_adj; extern int nfsrv_pnfsatime; extern int nfsrv_maxpnfsmirror; extern int nfs_maxcopyrange; extern bool nfs_use_ext_pgs; #endif /* !APPLEKEXT */ static int nfs_async = 0; SYSCTL_DECL(_vfs_nfsd); SYSCTL_INT(_vfs_nfsd, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "Tell client that writes were synced even though they were not"); extern int nfsrv_doflexfile; SYSCTL_INT(_vfs_nfsd, OID_AUTO, default_flexfile, CTLFLAG_RW, &nfsrv_doflexfile, 0, "Make Flex File Layout the default for pNFS"); static int nfsrv_linux42server = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, linux42server, CTLFLAG_RW, &nfsrv_linux42server, 0, "Enable Linux style NFSv4.2 server (non-RFC compliant)"); static bool nfsrv_openaccess = true; SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, v4openaccess, CTLFLAG_RW, &nfsrv_openaccess, 0, "Enable Linux style NFSv4 Open access check"); /* * This list defines the GSS mechanisms supported. * (Don't ask me how you get these strings from the RFC stuff like * iso(1), org(3)... but someone did it, so I don't need to know.) */ static struct nfsgss_mechlist nfsgss_mechlist[] = { { 9, "\052\206\110\206\367\022\001\002\002", 11 }, { 0, "", 0 }, }; /* local functions */ static void nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp, int pathlen); static void nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp); /* * nfs access service (not a part of NFS V2) */ APPLESTATIC int nfsrvd_access(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { u_int32_t *tl; int getret, error = 0; struct nfsvattr nva; u_int32_t testmode, nfsmode, supported = 0; accmode_t deletebit; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, 1, &nva); goto out; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nfsmode = fxdr_unsigned(u_int32_t, *tl); if ((nd->nd_flag & ND_NFSV4) && (nfsmode & ~(NFSACCESS_READ | NFSACCESS_LOOKUP | NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE | NFSACCESS_EXECUTE | NFSACCESS_XAREAD | NFSACCESS_XAWRITE | NFSACCESS_XALIST))) { nd->nd_repstat = NFSERR_INVAL; vput(vp); goto out; } if (nfsmode & NFSACCESS_READ) { supported |= NFSACCESS_READ; if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_READ; } if (nfsmode & NFSACCESS_MODIFY) { supported |= NFSACCESS_MODIFY; if (nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_MODIFY; } if (nfsmode & NFSACCESS_EXTEND) { supported |= NFSACCESS_EXTEND; if (nfsvno_accchk(vp, VWRITE | VAPPEND, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_EXTEND; } if (nfsmode & NFSACCESS_XAREAD) { supported |= NFSACCESS_XAREAD; if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_XAREAD; } if (nfsmode & NFSACCESS_XAWRITE) { supported |= NFSACCESS_XAWRITE; if (nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_XAWRITE; } if (nfsmode & NFSACCESS_XALIST) { supported |= NFSACCESS_XALIST; if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_XALIST; } if (nfsmode & NFSACCESS_DELETE) { supported |= NFSACCESS_DELETE; if (vp->v_type == VDIR) deletebit = VDELETE_CHILD; else deletebit = VDELETE; if (nfsvno_accchk(vp, deletebit, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_DELETE; } if (vnode_vtype(vp) == VDIR) testmode = NFSACCESS_LOOKUP; else testmode = NFSACCESS_EXECUTE; if (nfsmode & testmode) { supported |= (nfsmode & testmode); if (nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~testmode; } nfsmode &= supported; if (nd->nd_flag & ND_NFSV3) { getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); nfsrv_postopattr(nd, getret, &nva); } vput(vp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(supported); } else NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsmode); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs getattr service */ APPLESTATIC int nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { struct nfsvattr nva; fhandle_t fh; int at_root = 0, error = 0, supports_nfsv4acls; struct nfsreferral *refp; nfsattrbit_t attrbits, tmpbits; struct mount *mp; struct vnode *tvp = NULL; struct vattr va; uint64_t mounted_on_fileno = 0; accmode_t accmode; struct thread *p = curthread; if (nd->nd_repstat) goto out; if (nd->nd_flag & ND_NFSV4) { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) { vput(vp); goto out; } /* * Check for a referral. */ refp = nfsv4root_getreferral(vp, NULL, 0); if (refp != NULL) { (void) nfsrv_putreferralattr(nd, &attrbits, refp, 1, &nd->nd_repstat); vput(vp); goto out; } if (nd->nd_repstat == 0) { accmode = 0; NFSSET_ATTRBIT(&tmpbits, &attrbits); /* * GETATTR with write-only attr time_access_set and time_modify_set * should return NFS4ERR_INVAL. */ if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEACCESSSET) || NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEMODIFYSET)){ error = NFSERR_INVAL; vput(vp); goto out; } if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_ACL)) { NFSCLRBIT_ATTRBIT(&tmpbits, NFSATTRBIT_ACL); accmode |= VREAD_ACL; } if (NFSNONZERO_ATTRBIT(&tmpbits)) accmode |= VREAD_ATTRIBUTES; if (accmode != 0) nd->nd_repstat = nfsvno_accchk(vp, accmode, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); } } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_FILEHANDLE)) nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) nd->nd_repstat = nfsrv_checkgetattr(nd, vp, &nva, &attrbits, p); if (nd->nd_repstat == 0) { supports_nfsv4acls = nfs_supportsnfsv4acls(vp); mp = vp->v_mount; if (nfsrv_enable_crossmntpt != 0 && vp->v_type == VDIR && (vp->v_vflag & VV_ROOT) != 0 && vp != rootvnode) { tvp = mp->mnt_vnodecovered; VREF(tvp); at_root = 1; } else at_root = 0; vfs_ref(mp); NFSVOPUNLOCK(vp); if (at_root != 0) { if ((nd->nd_repstat = NFSVOPLOCK(tvp, LK_SHARED)) == 0) { nd->nd_repstat = VOP_GETATTR( tvp, &va, nd->nd_cred); vput(tvp); } else vrele(tvp); if (nd->nd_repstat == 0) mounted_on_fileno = (uint64_t) va.va_fileid; else at_root = 0; } if (nd->nd_repstat == 0) nd->nd_repstat = vfs_busy(mp, 0); vfs_rel(mp); if (nd->nd_repstat == 0) { (void)nfsvno_fillattr(nd, mp, vp, &nva, &fh, 0, &attrbits, nd->nd_cred, p, isdgram, 1, supports_nfsv4acls, at_root, mounted_on_fileno); vfs_unbusy(mp); } vrele(vp); } else vput(vp); } else { nfsrv_fillattr(nd, &nva); vput(vp); } } else { vput(vp); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs setattr service */ APPLESTATIC int nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { struct nfsvattr nva, nva2; u_int32_t *tl; int preat_ret = 1, postat_ret = 1, gcheck = 0, error = 0; int gotproxystateid; struct timespec guard = { 0, 0 }; nfsattrbit_t attrbits, retbits; nfsv4stateid_t stateid; NFSACL_T *aclp = NULL; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); goto out; } #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif gotproxystateid = 0; NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); stateid.other[0] = *tl++; stateid.other[1] = *tl++; stateid.other[2] = *tl; if (stateid.other[0] == 0x55555555 && stateid.other[1] == 0x55555555 && stateid.other[2] == 0x55555555 && stateid.seqid == 0xffffffff) gotproxystateid = 1; } error = nfsrv_sattr(nd, vp, &nva, &attrbits, aclp, p); if (error) goto nfsmout; /* For NFSv4, only va_uid is used from nva2. */ NFSZERO_ATTRBIT(&retbits); NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER); preat_ret = nfsvno_getattr(vp, &nva2, nd, p, 1, &retbits); if (!nd->nd_repstat) nd->nd_repstat = preat_ret; NFSZERO_ATTRBIT(&retbits); if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); gcheck = fxdr_unsigned(int, *tl); if (gcheck) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &guard); } if (!nd->nd_repstat && gcheck && (nva2.na_ctime.tv_sec != guard.tv_sec || nva2.na_ctime.tv_nsec != guard.tv_nsec)) nd->nd_repstat = NFSERR_NOT_SYNC; if (nd->nd_repstat) { vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); goto out; } } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); /* * Now that we have all the fields, lets do it. * If the size is being changed write access is required, otherwise * just check for a read only file system. */ if (!nd->nd_repstat) { if (NFSVNO_NOTSETSIZE(&nva)) { if (NFSVNO_EXRDONLY(exp) || (vfs_flags(vnode_mount(vp)) & MNT_RDONLY)) nd->nd_repstat = EROFS; } else { if (vnode_vtype(vp) != VREG) nd->nd_repstat = EINVAL; else if (nva2.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp)) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); } } /* * Proxy operations from the MDS are allowed via the all 0s special * stateid. */ if (nd->nd_repstat == 0 && (nd->nd_flag & ND_NFSV4) != 0 && gotproxystateid == 0) nd->nd_repstat = nfsrv_checksetattr(vp, nd, &stateid, &nva, &attrbits, exp, p); if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) { /* * For V4, try setting the attrbutes in sets, so that the * reply bitmap will be correct for an error case. */ if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER) || NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, uid, nva.na_uid); NFSVNO_SETATTRVAL(&nva2, gid, nva.na_gid); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER); if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNERGROUP); } } if (!nd->nd_repstat && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SIZE)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, size, nva.na_size); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SIZE); } if (!nd->nd_repstat && (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET) || NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET))) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, atime, nva.na_atime); NFSVNO_SETATTRVAL(&nva2, mtime, nva.na_mtime); if (nva.na_vaflags & VA_UTIMES_NULL) { nva2.na_vaflags |= VA_UTIMES_NULL; NFSVNO_SETACTIVE(&nva2, vaflags); } nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEACCESSSET); if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEMODIFYSET); } } if (!nd->nd_repstat && (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODE) || NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODESETMASKED))) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, mode, nva.na_mode); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODE)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODE); if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODESETMASKED)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODESETMASKED); } } #ifdef NFS4_ACL_EXTATTR_NAME if (!nd->nd_repstat && aclp->acl_cnt > 0 && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_ACL)) { nd->nd_repstat = nfsrv_setacl(vp, aclp, nd->nd_cred, p); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_ACL); } #endif } else if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_setattr(vp, &nva, nd->nd_cred, p, exp); } if (nd->nd_flag & (ND_NFSV2 | ND_NFSV3)) { postat_ret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = postat_ret; } vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); else if (nd->nd_flag & ND_NFSV4) (void) nfsrv_putattrbit(nd, &retbits); else if (!nd->nd_repstat) nfsrv_fillattr(nd, &nva); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV4) { /* * For all nd_repstat, the V4 reply includes a bitmap, * even NFSERR_BADXDR, which is what this will end up * returning. */ (void) nfsrv_putattrbit(nd, &retbits); } NFSEXITCODE2(error, nd); return (error); } /* * nfs lookup rpc * (Also performs lookup parent for v4) */ APPLESTATIC int nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, struct nfsexstuff *exp) { struct nameidata named; vnode_t vp, dirp = NULL; int error = 0, dattr_ret = 1; struct nfsvattr nva, dattr; char *bufp; u_long *hashp; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } /* * For some reason, if dp is a symlink, the error * returned is supposed to be NFSERR_SYMLINK and not NFSERR_NOTDIR. */ if (dp->v_type == VLNK && (nd->nd_flag & ND_NFSV4)) { nd->nd_repstat = NFSERR_SYMLINK; vrele(dp); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (nd->nd_repstat) { if (dirp) { if (nd->nd_flag & ND_NFSV3) dattr_ret = nfsvno_getattr(dirp, &dattr, nd, p, 0, NULL); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } if (named.ni_startdir) vrele(named.ni_startdir); nfsvno_relpathbuf(&named); vp = named.ni_vp; if ((nd->nd_flag & ND_NFSV4) != 0 && !NFSVNO_EXPORTED(exp) && vp->v_type != VDIR && vp->v_type != VLNK) /* * Only allow lookup of VDIR and VLNK for traversal of * non-exported volumes during NFSv4 mounting. */ nd->nd_repstat = ENOENT; if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (vpp != NULL && nd->nd_repstat == 0) *vpp = vp; else vput(vp); if (dirp) { if (nd->nd_flag & ND_NFSV3) dattr_ret = nfsvno_getattr(dirp, &dattr, nd, p, 0, NULL); vrele(dirp); } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } if (nd->nd_flag & ND_NFSV2) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_fillattr(nd, &nva); } else if (nd->nd_flag & ND_NFSV3) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_postopattr(nd, 0, &nva); nfsrv_postopattr(nd, dattr_ret, &dattr); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs readlink service */ APPLESTATIC int nfsrvd_readlink(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct mbuf *mp = NULL, *mpend = NULL; int getret = 1, len; struct nfsvattr nva; struct thread *p = curthread; struct mbuf_ext_pgs *pgs; uint16_t off; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &nva); goto out; } if (vnode_vtype(vp) != VLNK) { if (nd->nd_flag & ND_NFSV2) nd->nd_repstat = ENXIO; else nd->nd_repstat = EINVAL; } if (nd->nd_repstat == 0) { if ((nd->nd_flag & ND_NOMAP) != 0) nd->nd_repstat = nfsvno_readlink(vp, nd->nd_cred, nd->nd_maxextsiz, p, &mp, &mpend, &len); else nd->nd_repstat = nfsvno_readlink(vp, nd->nd_cred, 0, p, &mp, &mpend, &len); } if (nd->nd_flag & ND_NFSV3) getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); if (nd->nd_repstat) goto out; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(len); if (mp != NULL) { nd->nd_mb->m_next = mp; nd->nd_mb = mpend; if ((mpend->m_flags & M_NOMAP) != 0) { - pgs = mpend->m_ext.ext_pgs; + pgs = &mpend->m_ext_pgs; nd->nd_bextpg = pgs->npgs - 1; nd->nd_bpos = (char *)(void *) - PHYS_TO_DMAP(pgs->pa[nd->nd_bextpg]); + PHYS_TO_DMAP(mpend->m_epg_pa[nd->nd_bextpg]); off = (nd->nd_bextpg == 0) ? pgs->first_pg_off : 0; nd->nd_bpos += off + pgs->last_pg_len; nd->nd_bextpgsiz = PAGE_SIZE - pgs->last_pg_len - off; } else nd->nd_bpos = mtod(mpend, char *) + mpend->m_len; } out: NFSEXITCODE2(0, nd); return (0); } /* * nfs read service */ APPLESTATIC int nfsrvd_read(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { u_int32_t *tl; int error = 0, cnt, getret = 1, gotproxystateid, reqlen, eof = 0; struct mbuf *m2, *m3; struct nfsvattr nva; off_t off = 0x0; struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; nfsv4stateid_t stateid; nfsquad_t clientid; struct thread *p = curthread; struct mbuf_ext_pgs *pgs; uint16_t poff; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &nva); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_int32_t, *tl++); reqlen = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; reqlen = fxdr_unsigned(int, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3*NFSX_UNSIGNED); reqlen = fxdr_unsigned(int, *(tl + 6)); } if (reqlen > NFS_SRVMAXDATA(nd)) { reqlen = NFS_SRVMAXDATA(nd); } else if (reqlen < 0) { error = EBADRPC; goto nfsmout; } gotproxystateid = 0; if (nd->nd_flag & ND_NFSV4) { stp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS); lop->lo_flags = NFSLCK_READ; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK1 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } stp->ls_stateid.other[2] = *tl++; /* * Don't allow the client to use a special stateid for a DS op. */ if ((nd->nd_flag & ND_DSSERVER) != 0 && ((stp->ls_stateid.other[0] == 0x0 && stp->ls_stateid.other[1] == 0x0 && stp->ls_stateid.other[2] == 0x0) || (stp->ls_stateid.other[0] == 0xffffffff && stp->ls_stateid.other[1] == 0xffffffff && stp->ls_stateid.other[2] == 0xffffffff) || stp->ls_stateid.seqid != 0)) nd->nd_repstat = NFSERR_BADSTATEID; /* However, allow the proxy stateid. */ if (stp->ls_stateid.seqid == 0xffffffff && stp->ls_stateid.other[0] == 0x55555555 && stp->ls_stateid.other[1] == 0x55555555 && stp->ls_stateid.other[2] == 0x55555555) gotproxystateid = 1; off = fxdr_hyper(tl); lop->lo_first = off; tl += 2; lop->lo_end = off + reqlen; /* * Paranoia, just in case it wraps around. */ if (lop->lo_end < off) lop->lo_end = NFS64BITSSET; } if (vnode_vtype(vp) != VREG) { if (nd->nd_flag & ND_NFSV3) nd->nd_repstat = EINVAL; else nd->nd_repstat = (vnode_vtype(vp) == VDIR) ? EISDIR : EINVAL; } getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; if (!nd->nd_repstat && (nva.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } /* * DS reads are marked by ND_DSSERVER or use the proxy special * stateid. */ if (nd->nd_repstat == 0 && (nd->nd_flag & (ND_NFSV4 | ND_DSSERVER)) == ND_NFSV4 && gotproxystateid == 0) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); goto out; } if (off >= nva.na_size) { cnt = 0; eof = 1; } else if (reqlen == 0) cnt = 0; else if ((off + reqlen) >= nva.na_size) { cnt = nva.na_size - off; eof = 1; } else cnt = reqlen; m3 = NULL; if (cnt > 0) { /* * If the cnt is larger than MCLBYTES, use ext_pgs if * possible. * Always use ext_pgs if ND_NOMAP is set. */ if ((nd->nd_flag & ND_NOMAP) != 0 || (PMAP_HAS_DMAP != 0 && ((nd->nd_flag & ND_TLS) != 0 || (nfs_use_ext_pgs && cnt > MCLBYTES)))) nd->nd_repstat = nfsvno_read(vp, off, cnt, nd->nd_cred, nd->nd_maxextsiz, p, &m3, &m2); else nd->nd_repstat = nfsvno_read(vp, off, cnt, nd->nd_cred, 0, p, &m3, &m2); if (!(nd->nd_flag & ND_NFSV4)) { getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; } if (nd->nd_repstat) { vput(vp); if (m3) m_freem(m3); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); goto out; } } vput(vp); if (nd->nd_flag & ND_NFSV2) { nfsrv_fillattr(nd, &nva); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); } else { if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &nva); NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(cnt); } else NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (eof) *tl++ = newnfs_true; else *tl++ = newnfs_false; } *tl = txdr_unsigned(cnt); if (m3) { nd->nd_mb->m_next = m3; nd->nd_mb = m2; if ((m2->m_flags & M_NOMAP) != 0) { nd->nd_flag |= ND_NOMAP; - pgs = m2->m_ext.ext_pgs; + pgs = &m2->m_ext_pgs; nd->nd_bextpg = pgs->npgs - 1; nd->nd_bpos = (char *)(void *) - PHYS_TO_DMAP(pgs->pa[nd->nd_bextpg]); + PHYS_TO_DMAP(m2->m_epg_pa[nd->nd_bextpg]); poff = (nd->nd_bextpg == 0) ? pgs->first_pg_off : 0; nd->nd_bpos += poff + pgs->last_pg_len; nd->nd_bextpgsiz = PAGE_SIZE - pgs->last_pg_len - poff; } else nd->nd_bpos = mtod(m2, char *) + m2->m_len; } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs write service */ APPLESTATIC int nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { u_int32_t *tl; struct nfsvattr nva, forat; int aftat_ret = 1, retlen, len, error = 0, forat_ret = 1; int gotproxystateid, stable = NFSWRITE_FILESYNC; off_t off; struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; nfsv4stateid_t stateid; nfsquad_t clientid; nfsattrbit_t attrbits; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); goto out; } gotproxystateid = 0; if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_int32_t, *++tl); tl += 2; retlen = len = fxdr_unsigned(int32_t, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 3; stable = fxdr_unsigned(int, *tl++); retlen = len = fxdr_unsigned(int32_t, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 4 * NFSX_UNSIGNED); stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK2 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } stp->ls_stateid.other[2] = *tl++; /* * Don't allow the client to use a special stateid for a DS op. */ if ((nd->nd_flag & ND_DSSERVER) != 0 && ((stp->ls_stateid.other[0] == 0x0 && stp->ls_stateid.other[1] == 0x0 && stp->ls_stateid.other[2] == 0x0) || (stp->ls_stateid.other[0] == 0xffffffff && stp->ls_stateid.other[1] == 0xffffffff && stp->ls_stateid.other[2] == 0xffffffff) || stp->ls_stateid.seqid != 0)) nd->nd_repstat = NFSERR_BADSTATEID; /* However, allow the proxy stateid. */ if (stp->ls_stateid.seqid == 0xffffffff && stp->ls_stateid.other[0] == 0x55555555 && stp->ls_stateid.other[1] == 0x55555555 && stp->ls_stateid.other[2] == 0x55555555) gotproxystateid = 1; off = fxdr_hyper(tl); lop->lo_first = off; tl += 2; stable = fxdr_unsigned(int, *tl++); retlen = len = fxdr_unsigned(int32_t, *tl); lop->lo_end = off + len; /* * Paranoia, just in case it wraps around, which shouldn't * ever happen anyhow. */ if (lop->lo_end < lop->lo_first) lop->lo_end = NFS64BITSSET; } if (retlen > NFS_SRVMAXIO || retlen < 0) nd->nd_repstat = EIO; if (vnode_vtype(vp) != VREG && !nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nd->nd_repstat = EINVAL; else nd->nd_repstat = (vnode_vtype(vp) == VDIR) ? EISDIR : EINVAL; } NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); forat_ret = nfsvno_getattr(vp, &forat, nd, p, 1, &attrbits); if (!nd->nd_repstat) nd->nd_repstat = forat_ret; if (!nd->nd_repstat && (forat.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); /* * DS reads are marked by ND_DSSERVER or use the proxy special * stateid. */ if (nd->nd_repstat == 0 && (nd->nd_flag & (ND_NFSV4 | ND_DSSERVER)) == ND_NFSV4 && gotproxystateid == 0) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); goto out; } /* * For NFS Version 2, it is not obvious what a write of zero length * should do, but I might as well be consistent with Version 3, * which is to return ok so long as there are no permission problems. */ if (retlen > 0) { nd->nd_repstat = nfsvno_write(vp, off, retlen, &stable, nd, p); error = nfsm_advance(nd, NFSM_RNDUP(retlen), -1); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV4) aftat_ret = 0; else aftat_ret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); vput(vp); if (!nd->nd_repstat) nd->nd_repstat = aftat_ret; if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); if (nd->nd_repstat) goto out; NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(retlen); /* * If nfs_async is set, then pretend the write was FILESYNC. * Warning: Doing this violates RFC1813 and runs a risk * of data written by a client being lost when the server * crashes/reboots. */ if (stable == NFSWRITE_UNSTABLE && nfs_async == 0) *tl++ = txdr_unsigned(stable); else *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(nfsboottime.tv_usec); } else if (!nd->nd_repstat) nfsrv_fillattr(nd, &nva); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs create service (creates regular files for V2 and V3. Spec. files for V2.) * now does a truncate to 0 length via. setattr if it already exists * The core creation routine has been extracted out into nfsrv_creatsub(), * so it can also be used by nfsrv_open() for V4. */ APPLESTATIC int nfsrvd_create(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nfsv2_sattr *sp; struct nameidata named; u_int32_t *tl; int error = 0, tsize, dirfor_ret = 1, diraft_ret = 1; int how = NFSCREATE_UNCHECKED, exclusive_flag = 0; NFSDEV_T rdev = 0; vnode_t vp = NULL, dirp = NULL; fhandle_t fh; char *bufp; u_long *hashp; enum vtype vtyp; int32_t cverf[2], tverf[2] = { 0, 0 }; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vtyp = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode)); if (vtyp == VNON) vtyp = VREG; NFSVNO_SETATTRVAL(&nva, type, vtyp); NFSVNO_SETATTRVAL(&nva, mode, nfstov_mode(sp->sa_mode)); switch (nva.na_type) { case VREG: tsize = fxdr_unsigned(int32_t, sp->sa_size); if (tsize != -1) NFSVNO_SETATTRVAL(&nva, size, (u_quad_t)tsize); break; case VCHR: case VBLK: case VFIFO: rdev = fxdr_unsigned(NFSDEV_T, sp->sa_size); break; default: break; } } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSCREATE_GUARDED: case NFSCREATE_UNCHECKED: error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p); if (error) goto nfsmout; break; case NFSCREATE_EXCLUSIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; exclusive_flag = 1; break; } NFSVNO_SETATTRVAL(&nva, type, VREG); } } if (nd->nd_repstat) { nfsvno_relpathbuf(&named); if (nd->nd_flag & ND_NFSV3) { dirfor_ret = nfsvno_getattr(dp, &dirfor, nd, p, 1, NULL); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } vput(dp); goto out; } nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); if (dirp) { if (nd->nd_flag & ND_NFSV2) { vrele(dirp); dirp = NULL; } else { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); } } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); goto out; } if (!(nd->nd_flag & ND_NFSV2)) { switch (how) { case NFSCREATE_GUARDED: if (named.ni_vp) nd->nd_repstat = EEXIST; break; case NFSCREATE_UNCHECKED: break; case NFSCREATE_EXCLUSIVE: if (named.ni_vp == NULL) NFSVNO_SETATTRVAL(&nva, mode, 0); break; } } /* * Iff doesn't exist, create it * otherwise just truncate to 0 length * should I set the mode too ? */ nd->nd_repstat = nfsvno_createsub(nd, &named, &vp, &nva, &exclusive_flag, cverf, rdev, exp); if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); vput(vp); if (!nd->nd_repstat) { tverf[0] = nva.na_atime.tv_sec; tverf[1] = nva.na_atime.tv_nsec; } } if (nd->nd_flag & ND_NFSV2) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0); nfsrv_fillattr(nd, &nva); } } else { if (exclusive_flag && !nd->nd_repstat && (cverf[0] != tverf[0] || cverf[1] != tverf[1])) nd->nd_repstat = EEXIST; diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(dp); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } /* * nfs v3 mknod service (and v4 create) */ APPLESTATIC int nfsrvd_mknod(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; u_int32_t *tl; struct nameidata named; int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen; u_int32_t major, minor; enum vtype vtyp = VNON; nfstype nfs4type = NFNON; vnode_t vp, dirp = NULL; nfsattrbit_t attrbits; char *bufp = NULL, *pathcp = NULL; u_long *hashp, cnflags; NFSACL_T *aclp = NULL; struct thread *p = curthread; NFSVNO_ATTRINIT(&nva); cnflags = (LOCKPARENT | SAVESTART); if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif /* * For V4, the creation stuff is here, Yuck! */ if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); vtyp = nfsv34tov_type(*tl); nfs4type = fxdr_unsigned(nfstype, *tl); switch (nfs4type) { case NFLNK: error = nfsvno_getsymlink(nd, &nva, p, &pathcp, &pathlen); if (error) goto nfsmout; break; case NFCHR: case NFBLK: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_int32_t, *tl++); minor = fxdr_unsigned(u_int32_t, *tl); nva.na_rdev = NFSMAKEDEV(major, minor); break; case NFSOCK: case NFFIFO: break; case NFDIR: cnflags = (LOCKPARENT | SAVENAME); break; default: nd->nd_repstat = NFSERR_BADTYPE; vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif goto out; } } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, cnflags | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); vtyp = nfsv34tov_type(*tl); } error = nfsrv_sattr(nd, NULL, &nva, &attrbits, aclp, p); if (error) goto nfsmout; nva.na_type = vtyp; if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV3) && (vtyp == VCHR || vtyp == VBLK)) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_int32_t, *tl++); minor = fxdr_unsigned(u_int32_t, *tl); nva.na_rdev = NFSMAKEDEV(major, minor); } } dirfor_ret = nfsvno_getattr(dp, &dirfor, nd, p, 0, NULL); if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) { if (!dirfor_ret && NFSVNO_ISSETGID(&nva) && dirfor.na_gid == nva.na_gid) NFSVNO_UNSET(&nva, gid); nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); } if (nd->nd_repstat) { vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif nfsvno_relpathbuf(&named); if (pathcp) free(pathcp, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } /* * Yuck! For V4, mkdir and link are here and some V4 clients don't fill * in va_mode, so we'll have to set a default here. */ if (NFSVNO_NOTSETMODE(&nva)) { if (vtyp == VLNK) nva.na_mode = 0755; else nva.na_mode = 0400; } if (vtyp == VDIR) named.ni_cnd.cn_flags |= WILLBEDIR; nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); if (nd->nd_repstat) { if (dirp) { if (nd->nd_flag & ND_NFSV3) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); vrele(dirp); } #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (dirp) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); if ((nd->nd_flag & ND_NFSV4) && (vtyp == VDIR || vtyp == VLNK)) { if (vtyp == VDIR) { nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, &attrbits, aclp, p, exp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif goto out; } else if (vtyp == VLNK) { nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, &attrbits, aclp, p, exp, pathcp, pathlen); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif free(pathcp, M_TEMP); goto out; } } nd->nd_repstat = nfsvno_mknod(&named, &nva, nd->nd_cred, p); if (!nd->nd_repstat) { vp = named.ni_vp; nfsrv_fixattr(nd, vp, &nva, aclp, p, &attrbits, exp); nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if ((nd->nd_flag & ND_NFSV3) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (vpp != NULL && nd->nd_repstat == 0) { NFSVOPUNLOCK(vp); *vpp = vp; } else vput(vp); } diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } else { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); (void) nfsrv_putattrbit(nd, &attrbits); } } if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif out: NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (bufp) nfsvno_relpathbuf(&named); if (pathcp) free(pathcp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * nfs remove service */ APPLESTATIC int nfsrvd_remove(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, struct nfsexstuff *exp) { struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1; vnode_t dirp = NULL; struct nfsvattr dirfor, diraft; char *bufp; u_long *hashp; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, DELETE, LOCKPARENT | LOCKLEAF); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vput(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); } else { vput(dp); nfsvno_relpathbuf(&named); } if (dirp) { if (!(nd->nd_flag & ND_NFSV2)) { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); } else { vrele(dirp); dirp = NULL; } } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { if (vnode_vtype(named.ni_vp) == VDIR) nd->nd_repstat = nfsvno_rmdirsub(&named, 1, nd->nd_cred, p, exp); else nd->nd_repstat = nfsvno_removesub(&named, 1, nd->nd_cred, p, exp); } else if (nd->nd_procnum == NFSPROC_RMDIR) { nd->nd_repstat = nfsvno_rmdirsub(&named, 0, nd->nd_cred, p, exp); } else { nd->nd_repstat = nfsvno_removesub(&named, 0, nd->nd_cred, p, exp); } } if (!(nd->nd_flag & ND_NFSV2)) { if (dirp) { diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); } } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs rename service */ APPLESTATIC int nfsrvd_rename(struct nfsrv_descript *nd, int isdgram, vnode_t dp, vnode_t todp, struct nfsexstuff *exp, struct nfsexstuff *toexp) { u_int32_t *tl; int error = 0, fdirfor_ret = 1, fdiraft_ret = 1; int tdirfor_ret = 1, tdiraft_ret = 1; struct nameidata fromnd, tond; vnode_t fdirp = NULL, tdirp = NULL, tdp = NULL; struct nfsvattr fdirfor, fdiraft, tdirfor, tdiraft; struct nfsexstuff tnes; struct nfsrvfh tfh; char *bufp, *tbufp = NULL; u_long *hashp; fhandle_t fh; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); goto out; } if (!(nd->nd_flag & ND_NFSV2)) fdirfor_ret = nfsvno_getattr(dp, &fdirfor, nd, p, 1, NULL); tond.ni_cnd.cn_nameiop = 0; tond.ni_startdir = NULL; NFSNAMEICNDSET(&fromnd.ni_cnd, nd->nd_cred, DELETE, WANTPARENT | SAVESTART); nfsvno_setpathbuf(&fromnd, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &fromnd.ni_pathlen); if (error) { vput(dp); if (todp) vrele(todp); nfsvno_relpathbuf(&fromnd); goto out; } /* * Unlock dp in this code section, so it is unlocked before * tdp gets locked. This avoids a potential LOR if tdp is the * parent directory of dp. */ if (nd->nd_flag & ND_NFSV4) { tdp = todp; tnes = *toexp; if (dp != tdp) { NFSVOPUNLOCK(dp); /* Might lock tdp. */ tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 0, NULL); } else { tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1, NULL); NFSVOPUNLOCK(dp); } } else { tfh.nfsrvfh_len = 0; error = nfsrv_mtofh(nd, &tfh); if (error == 0) error = nfsvno_getfh(dp, &fh, p); if (error) { vput(dp); /* todp is always NULL except NFSv4 */ nfsvno_relpathbuf(&fromnd); goto out; } /* If this is the same file handle, just VREF() the vnode. */ if (tfh.nfsrvfh_len == NFSX_MYFH && !NFSBCMP(tfh.nfsrvfh_data, &fh, NFSX_MYFH)) { VREF(dp); tdp = dp; tnes = *exp; tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1, NULL); NFSVOPUNLOCK(dp); } else { NFSVOPUNLOCK(dp); nd->nd_cred->cr_uid = nd->nd_saveduid; nfsd_fhtovp(nd, &tfh, LK_EXCLUSIVE, &tdp, &tnes, NULL, 0); /* Locks tdp. */ if (tdp) { tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1, NULL); NFSVOPUNLOCK(tdp); } } } NFSNAMEICNDSET(&tond.ni_cnd, nd->nd_cred, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART); nfsvno_setpathbuf(&tond, &tbufp, &hashp); if (!nd->nd_repstat) { error = nfsrv_parsename(nd, tbufp, hashp, &tond.ni_pathlen); if (error) { if (tdp) vrele(tdp); vrele(dp); nfsvno_relpathbuf(&fromnd); nfsvno_relpathbuf(&tond); goto out; } } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } if (tdp) vrele(tdp); vrele(dp); nfsvno_relpathbuf(&fromnd); nfsvno_relpathbuf(&tond); goto out; } /* * Done parsing, now down to business. */ nd->nd_repstat = nfsvno_namei(nd, &fromnd, dp, 0, exp, p, &fdirp); if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } if (fdirp) vrele(fdirp); if (tdp) vrele(tdp); nfsvno_relpathbuf(&tond); goto out; } if (vnode_vtype(fromnd.ni_vp) == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; nd->nd_repstat = nfsvno_namei(nd, &tond, tdp, 0, &tnes, p, &tdirp); nd->nd_repstat = nfsvno_rename(&fromnd, &tond, nd->nd_repstat, nd->nd_flag, nd->nd_cred, p); if (fdirp) fdiraft_ret = nfsvno_getattr(fdirp, &fdiraft, nd, p, 0, NULL); if (tdirp) tdiraft_ret = nfsvno_getattr(tdirp, &tdiraft, nd, p, 0, NULL); if (fdirp) vrele(fdirp); if (tdirp) vrele(tdirp); if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 10 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(fdirfor.na_filerev, tl); tl += 2; txdr_hyper(fdiraft.na_filerev, tl); tl += 2; *tl++ = newnfs_false; txdr_hyper(tdirfor.na_filerev, tl); tl += 2; txdr_hyper(tdiraft.na_filerev, tl); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs link service */ APPLESTATIC int nfsrvd_link(struct nfsrv_descript *nd, int isdgram, vnode_t vp, vnode_t tovp, struct nfsexstuff *exp, struct nfsexstuff *toexp) { struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1, getret = 1; vnode_t dirp = NULL, dp = NULL; struct nfsvattr dirfor, diraft, at; struct nfsexstuff tnes; struct nfsrvfh dfh; char *bufp; u_long *hashp; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSVOPUNLOCK(vp); if (vnode_vtype(vp) == VDIR) { if (nd->nd_flag & ND_NFSV4) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; if (tovp) vrele(tovp); } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { dp = tovp; tnes = *toexp; } else { error = nfsrv_mtofh(nd, &dfh); if (error) { vrele(vp); /* tovp is always NULL unless NFSv4 */ goto out; } nfsd_fhtovp(nd, &dfh, LK_EXCLUSIVE, &dp, &tnes, NULL, 0); if (dp) NFSVOPUNLOCK(dp); } } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVENAME | NOCACHE); if (!nd->nd_repstat) { nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(vp); if (dp) vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, &tnes, p, &dirp); } else { if (dp) vrele(dp); nfsvno_relpathbuf(&named); } } if (dirp) { if (nd->nd_flag & ND_NFSV2) { vrele(dirp); dirp = NULL; } else { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); } } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_link(&named, vp, nd->nd_cred, p, exp); if (nd->nd_flag & ND_NFSV3) getret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); if (dirp) { diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); } vrele(vp); if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &at); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs symbolic link service */ APPLESTATIC int nfsrvd_symlink(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nameidata named; int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen; vnode_t dirp = NULL; char *bufp, *pathcp = NULL; u_long *hashp; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (vpp) *vpp = NULL; NFSVNO_ATTRINIT(&nva); NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (!error && !nd->nd_repstat) error = nfsvno_getsymlink(nd, &nva, p, &pathcp, &pathlen); if (error) { vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) { vrele(dirp); dirp = NULL; } /* * And call nfsrvd_symlinksub() to do the common code. It will * return EBADRPC upon a parsing error, 0 otherwise. */ if (!nd->nd_repstat) { if (dirp != NULL) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, NULL, NULL, p, exp, pathcp, pathlen); } else if (dirp != NULL) { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); vrele(dirp); } if (pathcp) free(pathcp, M_TEMP); if (nd->nd_flag & ND_NFSV3) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } out: NFSEXITCODE2(error, nd); return (error); } /* * Common code for creating a symbolic link. */ static void nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp, int pathlen) { u_int32_t *tl; nd->nd_repstat = nfsvno_symlink(ndp, nvap, pathcp, pathlen, !(nd->nd_flag & ND_NFSV2), nd->nd_saveduid, nd->nd_cred, p, exp); if (!nd->nd_repstat && !(nd->nd_flag & ND_NFSV2)) { nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp); if (nd->nd_flag & ND_NFSV3) { nd->nd_repstat = nfsvno_getfh(ndp->ni_vp, fhp, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(ndp->ni_vp, nvap, nd, p, 1, NULL); } if (vpp != NULL && nd->nd_repstat == 0) { NFSVOPUNLOCK(ndp->ni_vp); *vpp = ndp->ni_vp; } else vput(ndp->ni_vp); } if (dirp) { *diraft_retp = nfsvno_getattr(dirp, diraftp, nd, p, 0, NULL); vrele(dirp); } if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirforp->na_filerev, tl); tl += 2; txdr_hyper(diraftp->na_filerev, tl); (void) nfsrv_putattrbit(nd, attrbitp); } NFSEXITCODE2(0, nd); } /* * nfs mkdir service */ APPLESTATIC int nfsrvd_mkdir(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1; vnode_t dirp = NULL; char *bufp; u_long *hashp; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVENAME | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV3) { error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p); if (error) goto nfsmout; } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nva.na_mode = nfstov_mode(*tl++); } } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) { vrele(dirp); dirp = NULL; } if (nd->nd_repstat) { if (dirp != NULL) { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (dirp != NULL) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); /* * Call nfsrvd_mkdirsub() for the code common to V4 as well. */ nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, NULL, NULL, p, exp); if (nd->nd_flag & ND_NFSV3) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_fillattr(nd, &nva); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } /* * Code common to mkdir for V2,3 and 4. */ static void nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp) { vnode_t vp; u_int32_t *tl; NFSVNO_SETATTRVAL(nvap, type, VDIR); nd->nd_repstat = nfsvno_mkdir(ndp, nvap, nd->nd_saveduid, nd->nd_cred, p, exp); if (!nd->nd_repstat) { vp = ndp->ni_vp; nfsrv_fixattr(nd, vp, nvap, aclp, p, attrbitp, exp); nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, nvap, nd, p, 1, NULL); if (vpp && !nd->nd_repstat) { NFSVOPUNLOCK(vp); *vpp = vp; } else { vput(vp); } } if (dirp) { *diraft_retp = nfsvno_getattr(dirp, diraftp, nd, p, 0, NULL); vrele(dirp); } if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirforp->na_filerev, tl); tl += 2; txdr_hyper(diraftp->na_filerev, tl); (void) nfsrv_putattrbit(nd, attrbitp); } NFSEXITCODE2(0, nd); } /* * nfs commit service */ APPLESTATIC int nfsrvd_commit(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { struct nfsvattr bfor, aft; u_int32_t *tl; int error = 0, for_ret = 1, aft_ret = 1, cnt; u_int64_t off; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft); goto out; } /* Return NFSERR_ISDIR in NFSv4 when commit on a directory. */ if (vp->v_type != VREG) { if (nd->nd_flag & ND_NFSV3) error = NFSERR_NOTSUPP; else error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); /* * XXX At this time VOP_FSYNC() does not accept offset and byte * count parameters, so these arguments are useless (someday maybe). */ off = fxdr_hyper(tl); tl += 2; cnt = fxdr_unsigned(int, *tl); if (nd->nd_flag & ND_NFSV3) for_ret = nfsvno_getattr(vp, &bfor, nd, p, 1, NULL); nd->nd_repstat = nfsvno_fsync(vp, off, cnt, nd->nd_cred, p); if (nd->nd_flag & ND_NFSV3) { aft_ret = nfsvno_getattr(vp, &aft, nd, p, 1, NULL); nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft); } vput(vp); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(nfsboottime.tv_usec); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs statfs service */ APPLESTATIC int nfsrvd_statfs(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { struct statfs *sf; u_int32_t *tl; int getret = 1; struct nfsvattr at; u_quad_t tval; struct thread *p = curthread; sf = NULL; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } sf = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); nd->nd_repstat = nfsvno_statfs(vp, sf); getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); if (nd->nd_repstat) goto out; if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, NFSX_V2STATFS); *tl++ = txdr_unsigned(NFS_V2MAXDATA); *tl++ = txdr_unsigned(sf->f_bsize); *tl++ = txdr_unsigned(sf->f_blocks); *tl++ = txdr_unsigned(sf->f_bfree); *tl = txdr_unsigned(sf->f_bavail); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_V3STATFS); tval = (u_quad_t)sf->f_blocks; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_bfree; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_bavail; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_files; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_ffree; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_ffree; txdr_hyper(tval, tl); tl += 2; *tl = 0; } out: free(sf, M_STATFS); NFSEXITCODE2(0, nd); return (0); } /* * nfs fsinfo service */ APPLESTATIC int nfsrvd_fsinfo(struct nfsrv_descript *nd, int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsfsinfo fs; int getret = 1; struct nfsvattr at; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); nfsvno_getfs(&fs, isdgram); vput(vp); nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, NFSX_V3FSINFO); *tl++ = txdr_unsigned(fs.fs_rtmax); *tl++ = txdr_unsigned(fs.fs_rtpref); *tl++ = txdr_unsigned(fs.fs_rtmult); *tl++ = txdr_unsigned(fs.fs_wtmax); *tl++ = txdr_unsigned(fs.fs_wtpref); *tl++ = txdr_unsigned(fs.fs_wtmult); *tl++ = txdr_unsigned(fs.fs_dtpref); txdr_hyper(fs.fs_maxfilesize, tl); tl += 2; txdr_nfsv3time(&fs.fs_timedelta, tl); tl += 2; *tl = txdr_unsigned(fs.fs_properties); out: NFSEXITCODE2(0, nd); return (0); } /* * nfs pathconf service */ APPLESTATIC int nfsrvd_pathconf(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { struct nfsv3_pathconf *pc; int getret = 1; long linkmax, namemax, chownres, notrunc; struct nfsvattr at; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } nd->nd_repstat = nfsvno_pathconf(vp, _PC_LINK_MAX, &linkmax, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_pathconf(vp, _PC_NAME_MAX, &namemax, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat=nfsvno_pathconf(vp, _PC_CHOWN_RESTRICTED, &chownres, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_pathconf(vp, _PC_NO_TRUNC, ¬runc, nd->nd_cred, p); getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); vput(vp); nfsrv_postopattr(nd, getret, &at); if (!nd->nd_repstat) { NFSM_BUILD(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF); pc->pc_linkmax = txdr_unsigned(linkmax); pc->pc_namemax = txdr_unsigned(namemax); pc->pc_notrunc = txdr_unsigned(notrunc); pc->pc_chownrestricted = txdr_unsigned(chownres); /* * These should probably be supported by VOP_PATHCONF(), but * until msdosfs is exportable (why would you want to?), the * Unix defaults should be ok. */ pc->pc_caseinsensitive = newnfs_false; pc->pc_casepreserving = newnfs_true; } out: NFSEXITCODE2(0, nd); return (0); } /* * nfsv4 lock service */ APPLESTATIC int nfsrvd_lock(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp = NULL; struct nfslock *lop; struct nfslockconflict cf; int error = 0; u_short flags = NFSLCK_LOCK, lflags; u_int64_t offset, len; nfsv4stateid_t stateid; nfsquad_t clientid; struct thread *p = curthread; NFSM_DISSECT(tl, u_int32_t *, 7 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: lflags = NFSLCK_READ; break; case NFSV4LOCKT_WRITEW: flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: lflags = NFSLCK_WRITE; break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (*tl++ == newnfs_true) flags |= NFSLCK_RECLAIM; offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; if (*tl == newnfs_true) flags |= NFSLCK_OPENTOLOCK; if (flags & NFSLCK_OPENTOLOCK) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED + NFSX_STATEID); i = fxdr_unsigned(int, *(tl+4+(NFSX_STATEID / NFSX_UNSIGNED))); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp = malloc(sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = nd->nd_rp; stp->ls_seq = fxdr_unsigned(int, *tl++); stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stp->ls_stateid = nd->nd_curstateid; stp->ls_stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } stp->ls_opentolockseq = fxdr_unsigned(int, *tl++); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK3 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stp = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stp->ls_stateid = nd->nd_curstateid; stp->ls_stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } stp->ls_seq = fxdr_unsigned(int, *tl); clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK4 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } } lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); lop->lo_first = offset; if (len == NFS64BITSSET) { lop->lo_end = NFS64BITSSET; } else { lop->lo_end = offset + len; if (lop->lo_end <= lop->lo_first) nd->nd_repstat = NFSERR_INVAL; } lop->lo_flags = lflags; stp->ls_flags = flags; stp->ls_uid = nd->nd_cred->cr_uid; /* * Do basic access checking. */ if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } if (!nd->nd_repstat) { if (lflags & NFSLCK_WRITE) { nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } else { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } } /* * We call nfsrv_lockctrl() even if nd_repstat set, so that the * seqid# gets updated. nfsrv_lockctrl() will return the value * of nd_repstat, if it gets that far. */ nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid, &stateid, exp, nd, p); if (lop) free(lop, M_NFSDLOCK); if (stp) free(stp, M_NFSDSTATE); if (!nd->nd_repstat) { /* For NFSv4.1, set the Current StateID. */ if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_curstateid = stateid; nd->nd_flag |= ND_CURSTATEID; } NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } else if (nd->nd_repstat == NFSERR_DENIED) { NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); txdr_hyper(cf.cl_first, tl); tl += 2; if (cf.cl_end == NFS64BITSSET) len = NFS64BITSSET; else len = cf.cl_end - cf.cl_first; txdr_hyper(len, tl); tl += 2; if (cf.cl_flags == NFSLCK_WRITE) *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); else *tl++ = txdr_unsigned(NFSV4LOCKT_READ); *tl++ = stateid.other[0]; *tl = stateid.other[1]; (void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen); } vput(vp); NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 lock test service */ APPLESTATIC int nfsrvd_lockt(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp = NULL; struct nfslock lo, *lop = &lo; struct nfslockconflict cf; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; u_int64_t len; struct thread *p = curthread; NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *(tl + 7)); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp = malloc(sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = NULL; stp->ls_flags = NFSLCK_TEST; stp->ls_uid = nd->nd_cred->cr_uid; i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: lo.lo_flags = NFSLCK_READ; break; case NFSV4LOCKT_WRITEW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: lo.lo_flags = NFSLCK_WRITE; break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } lo.lo_first = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); if (len == NFS64BITSSET) { lo.lo_end = NFS64BITSSET; } else { lo.lo_end = lo.lo_first + len; if (lo.lo_end <= lo.lo_first) nd->nd_repstat = NFSERR_INVAL; } tl += 2; clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK5 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } if (!nd->nd_repstat) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { if (nd->nd_repstat == NFSERR_DENIED) { NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); txdr_hyper(cf.cl_first, tl); tl += 2; if (cf.cl_end == NFS64BITSSET) len = NFS64BITSSET; else len = cf.cl_end - cf.cl_first; txdr_hyper(len, tl); tl += 2; if (cf.cl_flags == NFSLCK_WRITE) *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); else *tl++ = txdr_unsigned(NFSV4LOCKT_READ); *tl++ = stp->ls_stateid.other[0]; *tl = stp->ls_stateid.other[1]; (void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen); } } vput(vp); if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 unlock service */ APPLESTATIC int nfsrvd_locku(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp; struct nfslock *lop; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; u_int64_t len; struct thread *p = curthread; NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED + NFSX_STATEID); stp = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); stp->ls_flags = NFSLCK_UNLOCK; lop->lo_flags = NFSLCK_UNLOCK; stp->ls_op = nd->nd_rp; i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: break; case NFSV4LOCKT_WRITEW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: break; default: nd->nd_repstat = NFSERR_BADXDR; free(stp, M_NFSDSTATE); free(lop, M_NFSDLOCK); goto nfsmout; } stp->ls_ownerlen = 0; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_seq = fxdr_unsigned(int, *tl++); stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stp->ls_stateid = nd->nd_curstateid; stp->ls_stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } lop->lo_first = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); if (len == NFS64BITSSET) { lop->lo_end = NFS64BITSSET; } else { lop->lo_end = lop->lo_first + len; if (lop->lo_end <= lop->lo_first) nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK6 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } /* * Call nfsrv_lockctrl() even if nd_repstat is set, so that the * seqid# gets incremented. nfsrv_lockctrl() will return the * value of nd_repstat, if it gets that far. */ nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (stp) free(stp, M_NFSDSTATE); if (lop) free(lop, M_NFSDLOCK); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 open service */ APPLESTATIC int nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, __unused fhandle_t *fhp, struct nfsexstuff *exp) { u_int32_t *tl; int i, retext; struct nfsstate *stp = NULL; int error = 0, create, claim, exclusive_flag = 0, override; u_int32_t rflags = NFSV4OPEN_LOCKTYPEPOSIX, acemask; int how = NFSCREATE_UNCHECKED; int32_t cverf[2], tverf[2] = { 0, 0 }; vnode_t vp = NULL, dirp = NULL; struct nfsvattr nva, dirfor, diraft; struct nameidata named; nfsv4stateid_t stateid, delegstateid; nfsattrbit_t attrbits; nfsquad_t clientid; char *bufp = NULL; u_long *hashp; NFSACL_T *aclp = NULL; struct thread *p = curthread; #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif NFSZERO_ATTRBIT(&attrbits); named.ni_startdir = NULL; named.ni_cnd.cn_nameiop = 0; NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *(tl + 5)); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp = malloc(sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = nd->nd_rp; stp->ls_flags = NFSLCK_OPEN; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); i = fxdr_unsigned(int, *tl++); retext = 0; if ((i & (NFSV4OPEN_WANTDELEGMASK | NFSV4OPEN_WANTSIGNALDELEG | NFSV4OPEN_WANTPUSHDELEG)) != 0 && (nd->nd_flag & ND_NFSV41) != 0) { retext = 1; /* For now, ignore these. */ i &= ~(NFSV4OPEN_WANTPUSHDELEG | NFSV4OPEN_WANTSIGNALDELEG); switch (i & NFSV4OPEN_WANTDELEGMASK) { case NFSV4OPEN_WANTANYDELEG: stp->ls_flags |= (NFSLCK_WANTRDELEG | NFSLCK_WANTWDELEG); i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTREADDELEG: stp->ls_flags |= NFSLCK_WANTRDELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTWRITEDELEG: stp->ls_flags |= NFSLCK_WANTWDELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTNODELEG: stp->ls_flags |= NFSLCK_WANTNODELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTCANCEL: printf("NFSv4: ignore Open WantCancel\n"); i &= ~NFSV4OPEN_WANTDELEGMASK; break; default: /* nd_repstat will be set to NFSERR_INVAL below. */ break; } } switch (i) { case NFSV4OPEN_ACCESSREAD: stp->ls_flags |= NFSLCK_READACCESS; break; case NFSV4OPEN_ACCESSWRITE: stp->ls_flags |= NFSLCK_WRITEACCESS; break; case NFSV4OPEN_ACCESSBOTH: stp->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); break; default: nd->nd_repstat = NFSERR_INVAL; } i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4OPEN_DENYNONE: break; case NFSV4OPEN_DENYREAD: stp->ls_flags |= NFSLCK_READDENY; break; case NFSV4OPEN_DENYWRITE: stp->ls_flags |= NFSLCK_WRITEDENY; break; case NFSV4OPEN_DENYBOTH: stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY); break; default: nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK7 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; NFSVNO_ATTRINIT(&nva); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); create = fxdr_unsigned(int, *tl); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(dp, &dirfor, nd, p, 0, NULL); if (create == NFSV4OPEN_CREATE) { nva.na_type = VREG; nva.na_mode = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSCREATE_UNCHECKED: case NFSCREATE_GUARDED: error = nfsv4_sattr(nd, NULL, &nva, &attrbits, aclp, p); if (error) goto nfsmout; /* * If the na_gid being set is the same as that of * the directory it is going in, clear it, since * that is what will be set by default. This allows * a user that isn't in that group to do the create. */ if (!nd->nd_repstat && NFSVNO_ISSETGID(&nva) && nva.na_gid == dirfor.na_gid) NFSVNO_UNSET(&nva, gid); if (!nd->nd_repstat) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); break; case NFSCREATE_EXCLUSIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; break; case NFSCREATE_EXCLUSIVE41: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; error = nfsv4_sattr(nd, NULL, &nva, &attrbits, aclp, p); if (error != 0) goto nfsmout; if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET)) nd->nd_repstat = NFSERR_INVAL; /* * If the na_gid being set is the same as that of * the directory it is going in, clear it, since * that is what will be set by default. This allows * a user that isn't in that group to do the create. */ if (nd->nd_repstat == 0 && NFSVNO_ISSETGID(&nva) && nva.na_gid == dirfor.na_gid) NFSVNO_UNSET(&nva, gid); if (nd->nd_repstat == 0) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } } else if (create != NFSV4OPEN_NOCREATE) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } /* * Now, handle the claim, which usually includes looking up a * name in the directory referenced by dp. The exception is * NFSV4OPEN_CLAIMPREVIOUS. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); claim = fxdr_unsigned(int, *tl); if (claim == NFSV4OPEN_CLAIMDELEGATECUR) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl,(caddr_t)stateid.other,NFSX_STATEIDOTHER); stp->ls_flags |= NFSLCK_DELEGCUR; } else if (claim == NFSV4OPEN_CLAIMDELEGATEPREV) { stp->ls_flags |= NFSLCK_DELEGPREV; } if (claim == NFSV4OPEN_CLAIMNULL || claim == NFSV4OPEN_CLAIMDELEGATECUR || claim == NFSV4OPEN_CLAIMDELEGATEPREV) { if (!nd->nd_repstat && create == NFSV4OPEN_CREATE && claim != NFSV4OPEN_CLAIMNULL) nd->nd_repstat = NFSERR_INVAL; if (nd->nd_repstat) { nd->nd_repstat = nfsrv_opencheck(clientid, &stateid, stp, NULL, nd, p, nd->nd_repstat); goto nfsmout; } if (create == NFSV4OPEN_CREATE) NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); else NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif free(stp, M_NFSDSTATE); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (create == NFSV4OPEN_CREATE) { switch (how) { case NFSCREATE_UNCHECKED: if (named.ni_vp) { /* * Clear the setable attribute bits, except * for Size, if it is being truncated. */ NFSZERO_ATTRBIT(&attrbits); if (NFSVNO_ISSETSIZE(&nva)) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); } break; case NFSCREATE_GUARDED: if (named.ni_vp && !nd->nd_repstat) nd->nd_repstat = EEXIST; break; case NFSCREATE_EXCLUSIVE: exclusive_flag = 1; if (!named.ni_vp) nva.na_mode = 0; break; case NFSCREATE_EXCLUSIVE41: exclusive_flag = 1; break; } } nfsvno_open(nd, &named, clientid, &stateid, stp, &exclusive_flag, &nva, cverf, create, aclp, &attrbits, nd->nd_cred, exp, &vp); } else if (claim == NFSV4OPEN_CLAIMPREVIOUS || claim == NFSV4OPEN_CLAIMFH) { if (claim == NFSV4OPEN_CLAIMPREVIOUS) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); switch (i) { case NFSV4OPEN_DELEGATEREAD: stp->ls_flags |= NFSLCK_DELEGREAD; break; case NFSV4OPEN_DELEGATEWRITE: stp->ls_flags |= NFSLCK_DELEGWRITE; case NFSV4OPEN_DELEGATENONE: break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp->ls_flags |= NFSLCK_RECLAIM; } else { /* CLAIM_NULL_FH */ if (nd->nd_repstat == 0 && create == NFSV4OPEN_CREATE) nd->nd_repstat = NFSERR_INVAL; } vp = dp; NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (!VN_IS_DOOMED(vp)) nd->nd_repstat = nfsrv_opencheck(clientid, &stateid, stp, vp, nd, p, nd->nd_repstat); else nd->nd_repstat = NFSERR_PERM; } else { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } /* * Do basic access checking. */ if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { /* * The IETF working group decided that this is the correct * error return for all non-regular files. */ nd->nd_repstat = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_SYMLINK; } /* * If the Open is being done for a file that already exists, apply * normal permission checking including for the file owner, if * vfs.nfsd.v4openaccess is set. * Previously, the owner was always allowed to open the file to * be consistent with the NFS tradition of always allowing the * owner of the file to write to the file regardless of permissions. * It now appears that the Linux client expects the owner * permissions to be checked for opens that are not creating the * file. I believe the correct approach is to use the Access * operation's results to be consistent with NFSv3, but that is * not what the current Linux client appears to be doing. * Since both the Linux and OpenSolaris NFSv4 servers do this check, * I have enabled it by default. * If this semantic change causes a problem, it can be disabled by * setting the sysctl vfs.nfsd.v4openaccess to 0 to re-enable the * previous semantics. */ if (nfsrv_openaccess && create == NFSV4OPEN_NOCREATE) override = NFSACCCHK_NOOVERRIDE; else override = NFSACCCHK_ALLOWOWNER; if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_WRITEACCESS)) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, override, NFSACCCHK_VPISLOCKED, NULL); if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_READACCESS)) { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, override, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, override, NFSACCCHK_VPISLOCKED, NULL); } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) { tverf[0] = nva.na_atime.tv_sec; tverf[1] = nva.na_atime.tv_nsec; } } if (!nd->nd_repstat && exclusive_flag && (cverf[0] != tverf[0] || cverf[1] != tverf[1])) nd->nd_repstat = EEXIST; /* * Do the open locking/delegation stuff. */ if (!nd->nd_repstat) nd->nd_repstat = nfsrv_openctrl(nd, vp, &stp, clientid, &stateid, &delegstateid, &rflags, exp, p, nva.na_filerev); /* * vp must be unlocked before the call to nfsvno_getattr(dirp,...) * below, to avoid a deadlock with the lookup in nfsvno_namei() above. * (ie: Leave the NFSVOPUNLOCK() about here.) */ if (vp) NFSVOPUNLOCK(vp); if (stp) free(stp, M_NFSDSTATE); if (!nd->nd_repstat && dirp) nd->nd_repstat = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); if (!nd->nd_repstat) { /* For NFSv4.1, set the Current StateID. */ if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_curstateid = stateid; nd->nd_flag |= ND_CURSTATEID; } NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (claim == NFSV4OPEN_CLAIMPREVIOUS) { *tl++ = newnfs_true; *tl++ = 0; *tl++ = 0; *tl++ = 0; *tl++ = 0; } else { *tl++ = newnfs_false; /* Since dirp is not locked */ txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); tl += 2; } *tl = txdr_unsigned(rflags & NFSV4OPEN_RFLAGS); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (rflags & NFSV4OPEN_READDELEGATE) *tl = txdr_unsigned(NFSV4OPEN_DELEGATEREAD); else if (rflags & NFSV4OPEN_WRITEDELEGATE) *tl = txdr_unsigned(NFSV4OPEN_DELEGATEWRITE); else if (retext != 0) { *tl = txdr_unsigned(NFSV4OPEN_DELEGATENONEEXT); if ((rflags & NFSV4OPEN_WDNOTWANTED) != 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_NOTWANTED); } else if ((rflags & NFSV4OPEN_WDSUPPFTYPE) != 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_NOTSUPPFTYPE); } else if ((rflags & NFSV4OPEN_WDCONTENTION) != 0) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CONTENTION); *tl = newnfs_false; } else if ((rflags & NFSV4OPEN_WDRESOURCE) != 0) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_RESOURCE); *tl = newnfs_false; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_NOTWANTED); } } else *tl = txdr_unsigned(NFSV4OPEN_DELEGATENONE); if (rflags & (NFSV4OPEN_READDELEGATE|NFSV4OPEN_WRITEDELEGATE)) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID+NFSX_UNSIGNED); *tl++ = txdr_unsigned(delegstateid.seqid); NFSBCOPY((caddr_t)delegstateid.other, (caddr_t)tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (rflags & NFSV4OPEN_RECALL) *tl = newnfs_true; else *tl = newnfs_false; if (rflags & NFSV4OPEN_WRITEDELEGATE) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_LIMITSIZE); txdr_hyper(nva.na_size, tl); } NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4ACE_ALLOWEDTYPE); *tl++ = txdr_unsigned(0x0); acemask = NFSV4ACE_ALLFILESMASK; if (nva.na_mode & S_IRUSR) acemask |= NFSV4ACE_READMASK; if (nva.na_mode & S_IWUSR) acemask |= NFSV4ACE_WRITEMASK; if (nva.na_mode & S_IXUSR) acemask |= NFSV4ACE_EXECUTEMASK; *tl = txdr_unsigned(acemask); (void) nfsm_strtom(nd, "OWNER@", 6); } *vpp = vp; } else if (vp) { vrele(vp); } if (dirp) vrele(dirp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 close service */ APPLESTATIC int nfsrvd_close(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate st, *stp = &st; int error = 0, writeacc; nfsv4stateid_t stateid; nfsquad_t clientid; struct nfsvattr na; struct thread *p = curthread; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) stp->ls_stateid = nd->nd_curstateid; else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } stp->ls_flags = NFSLCK_CLOSE; clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK8 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p, &writeacc); /* For pNFS, update the attributes. */ if (writeacc != 0 || nfsrv_pnfsatime != 0) nfsrv_updatemdsattr(vp, &na, p); vput(vp); if (!nd->nd_repstat) { /* * If the stateid that has been closed is the current stateid, * unset it. */ if ((nd->nd_flag & ND_CURSTATEID) != 0 && stateid.other[0] == nd->nd_curstateid.other[0] && stateid.other[1] == nd->nd_curstateid.other[1] && stateid.other[2] == nd->nd_curstateid.other[2]) nd->nd_flag &= ~ND_CURSTATEID; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 delegpurge service */ APPLESTATIC int nfsrvd_delegpurge(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid; struct thread *p = curthread; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK9 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_delegupdate(nd, clientid, NULL, NULL, NFSV4OP_DELEGPURGE, nd->nd_cred, p, NULL); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 delegreturn service */ APPLESTATIC int nfsrvd_delegreturn(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0, writeacc; nfsv4stateid_t stateid; nfsquad_t clientid; struct nfsvattr na; struct thread *p = curthread; NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other, NFSX_STATEIDOTHER); clientid.lval[0] = stateid.other[0]; clientid.lval[1] = stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK10 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_delegupdate(nd, clientid, &stateid, vp, NFSV4OP_DELEGRETURN, nd->nd_cred, p, &writeacc); /* For pNFS, update the attributes. */ if (writeacc != 0 || nfsrv_pnfsatime != 0) nfsrv_updatemdsattr(vp, &na, p); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 get file handle service */ APPLESTATIC int nfsrvd_getfh(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { fhandle_t fh; struct thread *p = curthread; nd->nd_repstat = nfsvno_getfh(vp, &fh, p); vput(vp); if (!nd->nd_repstat) (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0); NFSEXITCODE2(0, nd); return (0); } /* * nfsv4 open confirm service */ APPLESTATIC int nfsrvd_openconfirm(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate st, *stp = &st; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; struct thread *p = curthread; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); stp->ls_seq = fxdr_unsigned(u_int32_t, *tl); stp->ls_flags = NFSLCK_CONFIRM; clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK11 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p, NULL); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 open downgrade service */ APPLESTATIC int nfsrvd_opendowngrade(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate st, *stp = &st; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; struct thread *p = curthread; /* opendowngrade can only work on a file object.*/ if (vp->v_type != VREG) { error = NFSERR_INVAL; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) stp->ls_stateid = nd->nd_curstateid; else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); i = fxdr_unsigned(int, *tl++); if ((nd->nd_flag & ND_NFSV41) != 0) i &= ~NFSV4OPEN_WANTDELEGMASK; switch (i) { case NFSV4OPEN_ACCESSREAD: stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_DOWNGRADE); break; case NFSV4OPEN_ACCESSWRITE: stp->ls_flags = (NFSLCK_WRITEACCESS | NFSLCK_DOWNGRADE); break; case NFSV4OPEN_ACCESSBOTH: stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_WRITEACCESS | NFSLCK_DOWNGRADE); break; default: nd->nd_repstat = NFSERR_INVAL; } i = fxdr_unsigned(int, *tl); switch (i) { case NFSV4OPEN_DENYNONE: break; case NFSV4OPEN_DENYREAD: stp->ls_flags |= NFSLCK_READDENY; break; case NFSV4OPEN_DENYWRITE: stp->ls_flags |= NFSLCK_WRITEDENY; break; case NFSV4OPEN_DENYBOTH: stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY); break; default: nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK12 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } if (!nd->nd_repstat) nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p, NULL); if (!nd->nd_repstat) { /* For NFSv4.1, set the Current StateID. */ if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_curstateid = stateid; nd->nd_flag |= ND_CURSTATEID; } NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 renew lease service */ APPLESTATIC int nfsrvd_renew(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid; struct thread *p = curthread; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK13 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_RENEWOP|CLOPS_RENEW), NULL, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 security info service */ APPLESTATIC int nfsrvd_secinfo(struct nfsrv_descript *nd, int isdgram, vnode_t dp, struct nfsexstuff *exp) { u_int32_t *tl; int len; struct nameidata named; vnode_t dirp = NULL, vp; struct nfsrvfh fh; struct nfsexstuff retnes; u_int32_t *sizp; int error = 0, savflag, i; char *bufp; u_long *hashp; struct thread *p = curthread; /* * All this just to get the export flags for the name. */ NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vput(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); } else { vput(dp); nfsvno_relpathbuf(&named); } if (dirp) vrele(dirp); if (nd->nd_repstat) goto out; vrele(named.ni_startdir); nfsvno_relpathbuf(&named); fh.nfsrvfh_len = NFSX_MYFH; vp = named.ni_vp; nd->nd_repstat = nfsvno_getfh(vp, (fhandle_t *)fh.nfsrvfh_data, p); vput(vp); savflag = nd->nd_flag; if (!nd->nd_repstat) { nfsd_fhtovp(nd, &fh, LK_SHARED, &vp, &retnes, NULL, 0); if (vp) vput(vp); } nd->nd_flag = savflag; if (nd->nd_repstat) goto out; /* * Finally have the export flags for name, so we can create * the security info. */ len = 0; NFSM_BUILD(sizp, u_int32_t *, NFSX_UNSIGNED); for (i = 0; i < retnes.nes_numsecflavor; i++) { if (retnes.nes_secflavors[i] == AUTH_SYS) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(RPCAUTH_UNIX); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCNONE); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5I) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5P) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY); len++; } } *sizp = txdr_unsigned(len); out: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 set client id service */ APPLESTATIC int nfsrvd_setclientid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int i; int error = 0, idlen; struct nfsclient *clp = NULL; #ifdef INET struct sockaddr_in *rin; #endif #ifdef INET6 struct sockaddr_in6 *rin6; #endif #if defined(INET) || defined(INET6) u_char *ucp, *ucp2; #endif u_char *verf, *addrbuf; nfsquad_t clientid, confirm; struct thread *p = curthread; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto out; } NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED); verf = (u_char *)tl; tl += (NFSX_VERF / NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i > NFSV4_OPAQUELIMIT || i <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } idlen = i; if (nd->nd_flag & ND_GSS) i += nd->nd_princlen; clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK | M_ZERO); clp->lc_stateid = malloc(sizeof(struct nfsstatehead) * nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK); NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx); /* Allocated large enough for an AF_INET or AF_INET6 socket. */ clp->lc_req.nr_nam = malloc(sizeof(struct sockaddr_in6), M_SONAME, M_WAITOK | M_ZERO); clp->lc_req.nr_cred = NULL; NFSBCOPY(verf, clp->lc_verf, NFSX_VERF); clp->lc_idlen = idlen; error = nfsrv_mtostr(nd, clp->lc_id, idlen); if (error) goto nfsmout; if (nd->nd_flag & ND_GSS) { clp->lc_flags = LCL_GSS; if (nd->nd_flag & ND_GSSINTEGRITY) clp->lc_flags |= LCL_GSSINTEGRITY; else if (nd->nd_flag & ND_GSSPRIVACY) clp->lc_flags |= LCL_GSSPRIVACY; } else { clp->lc_flags = 0; } if ((nd->nd_flag & ND_GSS) && nd->nd_princlen > 0) { clp->lc_flags |= LCL_NAME; clp->lc_namelen = nd->nd_princlen; clp->lc_name = &clp->lc_id[idlen]; NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen); } else { clp->lc_uid = nd->nd_cred->cr_uid; clp->lc_gid = nd->nd_cred->cr_gid; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); clp->lc_program = fxdr_unsigned(u_int32_t, *tl); error = nfsrv_getclientipaddr(nd, clp); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); clp->lc_callback = fxdr_unsigned(u_int32_t, *tl); /* * nfsrv_setclient() does the actual work of adding it to the * client list. If there is no error, the structure has been * linked into the client list and clp should no longer be used * here. When an error is returned, it has not been linked in, * so it should be free'd. */ nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p); if (nd->nd_repstat == NFSERR_CLIDINUSE) { /* * 8 is the maximum length of the port# string. */ addrbuf = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK); switch (clp->lc_req.nr_nam->sa_family) { #ifdef INET case AF_INET: if (clp->lc_flags & LCL_TCPCALLBACK) (void) nfsm_strtom(nd, "tcp", 3); else (void) nfsm_strtom(nd, "udp", 3); rin = (struct sockaddr_in *)clp->lc_req.nr_nam; ucp = (u_char *)&rin->sin_addr.s_addr; ucp2 = (u_char *)&rin->sin_port; sprintf(addrbuf, "%d.%d.%d.%d.%d.%d", ucp[0] & 0xff, ucp[1] & 0xff, ucp[2] & 0xff, ucp[3] & 0xff, ucp2[0] & 0xff, ucp2[1] & 0xff); break; #endif #ifdef INET6 case AF_INET6: if (clp->lc_flags & LCL_TCPCALLBACK) (void) nfsm_strtom(nd, "tcp6", 4); else (void) nfsm_strtom(nd, "udp6", 4); rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; ucp = inet_ntop(AF_INET6, &rin6->sin6_addr, addrbuf, INET6_ADDRSTRLEN); if (ucp != NULL) i = strlen(ucp); else i = 0; ucp2 = (u_char *)&rin6->sin6_port; sprintf(&addrbuf[i], ".%d.%d", ucp2[0] & 0xff, ucp2[1] & 0xff); break; #endif } (void) nfsm_strtom(nd, addrbuf, strlen(addrbuf)); free(addrbuf, M_TEMP); } if (clp) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER); *tl++ = clientid.lval[0]; *tl++ = clientid.lval[1]; *tl++ = confirm.lval[0]; *tl = confirm.lval[1]; } out: NFSEXITCODE2(0, nd); return (0); nfsmout: if (clp) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 set client id confirm service */ APPLESTATIC int nfsrvd_setclientidcfrm(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid, confirm; struct thread *p = curthread; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; confirm.lval[0] = *tl++; confirm.lval[1] = *tl; /* * nfsrv_getclient() searches the client list for a match and * returns the appropriate NFSERR status. */ nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_CONFIRM|CLOPS_RENEW), NULL, NULL, confirm, 0, nd, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 verify service */ APPLESTATIC int nfsrvd_verify(struct nfsrv_descript *nd, int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { int error = 0, ret, fhsize = NFSX_MYFH; struct nfsvattr nva; struct statfs *sf; struct nfsfsinfo fs; fhandle_t fh; struct thread *p = curthread; sf = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_statfs(vp, sf); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) { nfsvno_getfs(&fs, isdgram); error = nfsv4_loadattr(nd, vp, &nva, NULL, &fh, fhsize, NULL, sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, p, nd->nd_cred); if (!error) { if (nd->nd_procnum == NFSV4OP_NVERIFY) { if (ret == 0) nd->nd_repstat = NFSERR_SAME; else if (ret != NFSERR_NOTSAME) nd->nd_repstat = ret; } else if (ret) nd->nd_repstat = ret; } } vput(vp); free(sf, M_STATFS); NFSEXITCODE2(error, nd); return (error); } /* * nfs openattr rpc */ APPLESTATIC int nfsrvd_openattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, __unused vnode_t *vpp, __unused fhandle_t *fhp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0, createdir __unused; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); createdir = fxdr_unsigned(int, *tl); nd->nd_repstat = NFSERR_NOTSUPP; nfsmout: vrele(dp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 release lock owner service */ APPLESTATIC int nfsrvd_releaselckown(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate *stp = NULL; int error = 0, len; nfsquad_t clientid; struct thread *p = curthread; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 2)); if (len <= 0 || len > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp = malloc(sizeof (struct nfsstate) + len, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = len; stp->ls_op = NULL; stp->ls_flags = NFSLCK_RELEASE; stp->ls_uid = nd->nd_cred->cr_uid; clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK14 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, len); if (error) goto nfsmout; nd->nd_repstat = nfsrv_releaselckown(stp, clientid, p); free(stp, M_NFSDSTATE); NFSEXITCODE2(0, nd); return (0); nfsmout: if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 exchange_id service */ APPLESTATIC int nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0, i, idlen; struct nfsclient *clp = NULL; nfsquad_t clientid, confirm; uint8_t *verf; uint32_t sp4type, v41flags; uint64_t owner_minor; struct timespec verstime; #ifdef INET struct sockaddr_in *sin, *rin; #endif #ifdef INET6 struct sockaddr_in6 *sin6, *rin6; #endif struct thread *p = curthread; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED); verf = (uint8_t *)tl; tl += (NFSX_VERF / NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i > NFSV4_OPAQUELIMIT || i <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } idlen = i; if (nd->nd_flag & ND_GSS) i += nd->nd_princlen; clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK | M_ZERO); clp->lc_stateid = malloc(sizeof(struct nfsstatehead) * nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK); NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx); /* Allocated large enough for an AF_INET or AF_INET6 socket. */ clp->lc_req.nr_nam = malloc(sizeof(struct sockaddr_in6), M_SONAME, M_WAITOK | M_ZERO); switch (nd->nd_nam->sa_family) { #ifdef INET case AF_INET: rin = (struct sockaddr_in *)clp->lc_req.nr_nam; sin = (struct sockaddr_in *)nd->nd_nam; rin->sin_family = AF_INET; rin->sin_len = sizeof(struct sockaddr_in); rin->sin_port = 0; rin->sin_addr.s_addr = sin->sin_addr.s_addr; break; #endif #ifdef INET6 case AF_INET6: rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; sin6 = (struct sockaddr_in6 *)nd->nd_nam; rin6->sin6_family = AF_INET6; rin6->sin6_len = sizeof(struct sockaddr_in6); rin6->sin6_port = 0; rin6->sin6_addr = sin6->sin6_addr; break; #endif } clp->lc_req.nr_cred = NULL; NFSBCOPY(verf, clp->lc_verf, NFSX_VERF); clp->lc_idlen = idlen; error = nfsrv_mtostr(nd, clp->lc_id, idlen); if (error != 0) goto nfsmout; if ((nd->nd_flag & ND_GSS) != 0) { clp->lc_flags = LCL_GSS | LCL_NFSV41; if ((nd->nd_flag & ND_GSSINTEGRITY) != 0) clp->lc_flags |= LCL_GSSINTEGRITY; else if ((nd->nd_flag & ND_GSSPRIVACY) != 0) clp->lc_flags |= LCL_GSSPRIVACY; } else clp->lc_flags = LCL_NFSV41; if ((nd->nd_flag & ND_NFSV42) != 0) clp->lc_flags |= LCL_NFSV42; if ((nd->nd_flag & ND_GSS) != 0 && nd->nd_princlen > 0) { clp->lc_flags |= LCL_NAME; clp->lc_namelen = nd->nd_princlen; clp->lc_name = &clp->lc_id[idlen]; NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen); } else { clp->lc_uid = nd->nd_cred->cr_uid; clp->lc_gid = nd->nd_cred->cr_gid; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); v41flags = fxdr_unsigned(uint32_t, *tl++); if ((v41flags & ~(NFSV4EXCH_SUPPMOVEDREFER | NFSV4EXCH_SUPPMOVEDMIGR | NFSV4EXCH_BINDPRINCSTATEID | NFSV4EXCH_MASKPNFS | NFSV4EXCH_UPDCONFIRMEDRECA)) != 0) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } if ((v41flags & NFSV4EXCH_UPDCONFIRMEDRECA) != 0) confirm.lval[1] = 1; else confirm.lval[1] = 0; if (nfsrv_devidcnt == 0) v41flags = NFSV4EXCH_USENONPNFS | NFSV4EXCH_USEPNFSDS; else v41flags = NFSV4EXCH_USEPNFSMDS; sp4type = fxdr_unsigned(uint32_t, *tl); if (sp4type != NFSV4EXCH_SP4NONE) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } /* * nfsrv_setclient() does the actual work of adding it to the * client list. If there is no error, the structure has been * linked into the client list and clp should no longer be used * here. When an error is returned, it has not been linked in, * so it should be free'd. */ nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p); if (clp != NULL) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } if (nd->nd_repstat == 0) { if (confirm.lval[1] != 0) v41flags |= NFSV4EXCH_CONFIRMEDR; NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + 3 * NFSX_UNSIGNED); *tl++ = clientid.lval[0]; /* ClientID */ *tl++ = clientid.lval[1]; *tl++ = txdr_unsigned(confirm.lval[0]); /* SequenceID */ *tl++ = txdr_unsigned(v41flags); /* Exch flags */ *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE); /* No SSV */ owner_minor = 0; /* Owner */ txdr_hyper(owner_minor, tl); /* Minor */ (void)nfsm_strtom(nd, nd->nd_cred->cr_prison->pr_hostuuid, strlen(nd->nd_cred->cr_prison->pr_hostuuid)); /* Major */ (void)nfsm_strtom(nd, nd->nd_cred->cr_prison->pr_hostuuid, strlen(nd->nd_cred->cr_prison->pr_hostuuid)); /* Scope */ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(1); (void)nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org")); (void)nfsm_strtom(nd, version, strlen(version)); NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME); verstime.tv_sec = 1293840000; /* Jan 1, 2011 */ verstime.tv_nsec = 0; txdr_nfsv4time(&verstime, tl); } NFSEXITCODE2(0, nd); return (0); nfsmout: if (clp != NULL) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 create session service */ APPLESTATIC int nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0; nfsquad_t clientid, confirm; struct nfsdsession *sep = NULL; uint32_t rdmacnt; struct thread *p = curthread; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } sep = (struct nfsdsession *)malloc(sizeof(struct nfsdsession), M_NFSDSESSION, M_WAITOK | M_ZERO); sep->sess_refcnt = 1; mtx_init(&sep->sess_cbsess.nfsess_mtx, "nfscbsession", NULL, MTX_DEF); NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; confirm.lval[0] = fxdr_unsigned(uint32_t, *tl++); sep->sess_crflags = fxdr_unsigned(uint32_t, *tl); /* Persistent sessions and RDMA are not supported. */ sep->sess_crflags &= NFSV4CRSESS_CONNBACKCHAN; /* Fore channel attributes. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Header pad always 0. */ sep->sess_maxreq = fxdr_unsigned(uint32_t, *tl++); if (sep->sess_maxreq > sb_max_adj - NFS_MAXXDR) { sep->sess_maxreq = sb_max_adj - NFS_MAXXDR; printf("Consider increasing kern.ipc.maxsockbuf\n"); } sep->sess_maxresp = fxdr_unsigned(uint32_t, *tl++); if (sep->sess_maxresp > sb_max_adj - NFS_MAXXDR) { sep->sess_maxresp = sb_max_adj - NFS_MAXXDR; printf("Consider increasing kern.ipc.maxsockbuf\n"); } sep->sess_maxrespcached = fxdr_unsigned(uint32_t, *tl++); sep->sess_maxops = fxdr_unsigned(uint32_t, *tl++); sep->sess_maxslots = fxdr_unsigned(uint32_t, *tl++); if (sep->sess_maxslots > NFSV4_SLOTS) sep->sess_maxslots = NFSV4_SLOTS; rdmacnt = fxdr_unsigned(uint32_t, *tl); if (rdmacnt > 1) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } else if (rdmacnt == 1) NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); /* Back channel attributes. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Header pad always 0. */ sep->sess_cbmaxreq = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxresp = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxrespcached = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxops = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbsess.nfsess_foreslots = fxdr_unsigned(uint32_t, *tl++); rdmacnt = fxdr_unsigned(uint32_t, *tl); if (rdmacnt > 1) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } else if (rdmacnt == 1) NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); sep->sess_cbprogram = fxdr_unsigned(uint32_t, *tl); /* * nfsrv_getclient() searches the client list for a match and * returns the appropriate NFSERR status. */ nd->nd_repstat = nfsrv_getclient(clientid, CLOPS_CONFIRM | CLOPS_RENEW, NULL, sep, confirm, sep->sess_cbprogram, nd, p); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(sep->sess_sessionid, tl, NFSX_V4SESSIONID); NFSM_BUILD(tl, uint32_t *, 18 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(confirm.lval[0]); /* sequenceid */ *tl++ = txdr_unsigned(sep->sess_crflags); /* Fore channel attributes. */ *tl++ = 0; *tl++ = txdr_unsigned(sep->sess_maxreq); *tl++ = txdr_unsigned(sep->sess_maxresp); *tl++ = txdr_unsigned(sep->sess_maxrespcached); *tl++ = txdr_unsigned(sep->sess_maxops); *tl++ = txdr_unsigned(sep->sess_maxslots); *tl++ = txdr_unsigned(1); *tl++ = txdr_unsigned(0); /* No RDMA. */ /* Back channel attributes. */ *tl++ = 0; *tl++ = txdr_unsigned(sep->sess_cbmaxreq); *tl++ = txdr_unsigned(sep->sess_cbmaxresp); *tl++ = txdr_unsigned(sep->sess_cbmaxrespcached); *tl++ = txdr_unsigned(sep->sess_cbmaxops); *tl++ = txdr_unsigned(sep->sess_cbsess.nfsess_foreslots); *tl++ = txdr_unsigned(1); *tl = txdr_unsigned(0); /* No RDMA. */ } nfsmout: if (nd->nd_repstat != 0 && sep != NULL) free(sep, M_NFSDSESSION); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 sequence service */ APPLESTATIC int nfsrvd_sequence(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; uint32_t highest_slotid, sequenceid, sflags, target_highest_slotid; int cache_this, error = 0; struct thread *p = curthread; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(tl, nd->nd_sessionid, NFSX_V4SESSIONID); NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); sequenceid = fxdr_unsigned(uint32_t, *tl++); nd->nd_slotid = fxdr_unsigned(uint32_t, *tl++); highest_slotid = fxdr_unsigned(uint32_t, *tl++); if (*tl == newnfs_true) cache_this = 1; else cache_this = 0; nd->nd_flag |= ND_HASSEQUENCE; nd->nd_repstat = nfsrv_checksequence(nd, sequenceid, &highest_slotid, &target_highest_slotid, cache_this, &sflags, p); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(nd->nd_sessionid, tl, NFSX_V4SESSIONID); NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(sequenceid); *tl++ = txdr_unsigned(nd->nd_slotid); *tl++ = txdr_unsigned(highest_slotid); *tl++ = txdr_unsigned(target_highest_slotid); *tl = txdr_unsigned(sflags); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 reclaim complete service */ APPLESTATIC int nfsrvd_reclaimcomplete(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0, onefs; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); /* * I believe that a ReclaimComplete with rca_one_fs == TRUE is only * to be used after a file system has been transferred to a different * file server. However, RFC5661 is somewhat vague w.r.t. this and * the ESXi 6.7 client does both a ReclaimComplete with rca_one_fs * == TRUE and one with ReclaimComplete with rca_one_fs == FALSE. * Therefore, just ignore the rca_one_fs == TRUE operation and return * NFS_OK without doing anything. */ onefs = 0; if (*tl == newnfs_true) onefs = 1; nd->nd_repstat = nfsrv_checkreclaimcomplete(nd, onefs); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 destroy clientid service */ APPLESTATIC int nfsrvd_destroyclientid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsquad_t clientid; int error = 0; struct thread *p = curthread; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; nd->nd_repstat = nfsrv_destroyclient(clientid, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 bind connection to session service */ APPLESTATIC int nfsrvd_bindconnsess(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; uint8_t sessid[NFSX_V4SESSIONID]; int error = 0, foreaft; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED); NFSBCOPY(tl, sessid, NFSX_V4SESSIONID); tl += (NFSX_V4SESSIONID / NFSX_UNSIGNED); foreaft = fxdr_unsigned(int, *tl++); if (*tl == newnfs_true) { /* RDMA is not supported. */ nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } nd->nd_repstat = nfsrv_bindconnsess(nd, sessid, &foreaft); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED); NFSBCOPY(sessid, tl, NFSX_V4SESSIONID); tl += (NFSX_V4SESSIONID / NFSX_UNSIGNED); *tl++ = txdr_unsigned(foreaft); *tl = newnfs_false; } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 destroy session service */ APPLESTATIC int nfsrvd_destroysession(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint8_t *cp, sessid[NFSX_V4SESSIONID]; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(cp, uint8_t *, NFSX_V4SESSIONID); NFSBCOPY(cp, sessid, NFSX_V4SESSIONID); nd->nd_repstat = nfsrv_destroysession(nd, sessid); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 free stateid service */ APPLESTATIC int nfsrvd_freestateid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int error = 0; struct thread *p = curthread; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } nd->nd_repstat = nfsrv_freestateid(nd, &stateid, p); /* If the current stateid has been free'd, unset it. */ if (nd->nd_repstat == 0 && (nd->nd_flag & ND_CURSTATEID) != 0 && stateid.other[0] == nd->nd_curstateid.other[0] && stateid.other[1] == nd->nd_curstateid.other[1] && stateid.other[2] == nd->nd_curstateid.other[2]) nd->nd_flag &= ~ND_CURSTATEID; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layoutget service */ APPLESTATIC int nfsrvd_layoutget(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int error = 0, layoutlen, layouttype, iomode, maxcnt, retonclose; uint64_t offset, len, minlen; char *layp; struct thread *p = curthread; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER + NFSX_STATEID); tl++; /* Signal layout available. Ignore for now. */ layouttype = fxdr_unsigned(int, *tl++); iomode = fxdr_unsigned(int, *tl++); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; minlen = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); maxcnt = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "layoutget ltyp=%d iom=%d off=%ju len=%ju mlen=%ju\n", layouttype, iomode, (uintmax_t)offset, (uintmax_t)len, (uintmax_t)minlen); if (len < minlen || (minlen != UINT64_MAX && offset + minlen < offset) || (len != UINT64_MAX && offset + len < offset)) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } layp = NULL; if (layouttype == NFSLAYOUT_NFSV4_1_FILES && nfsrv_maxpnfsmirror == 1) layp = malloc(NFSX_V4FILELAYOUT, M_TEMP, M_WAITOK); else if (layouttype == NFSLAYOUT_FLEXFILE) layp = malloc(NFSX_V4FLEXLAYOUT(nfsrv_maxpnfsmirror), M_TEMP, M_WAITOK); else nd->nd_repstat = NFSERR_UNKNLAYOUTTYPE; if (layp != NULL) nd->nd_repstat = nfsrv_layoutget(nd, vp, exp, layouttype, &iomode, &offset, &len, minlen, &stateid, maxcnt, &retonclose, &layoutlen, layp, nd->nd_cred, p); NFSD_DEBUG(4, "nfsrv_layoutget stat=%u layoutlen=%d\n", nd->nd_repstat, layoutlen); if (nd->nd_repstat == 0) { /* For NFSv4.1, set the Current StateID. */ if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_curstateid = stateid; nd->nd_flag |= ND_CURSTATEID; } NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + NFSX_STATEID + 2 * NFSX_HYPER); *tl++ = txdr_unsigned(retonclose); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY(stateid.other, tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); *tl++ = txdr_unsigned(1); /* Only returns one layout. */ txdr_hyper(offset, tl); tl += 2; txdr_hyper(len, tl); tl += 2; *tl++ = txdr_unsigned(iomode); *tl = txdr_unsigned(layouttype); nfsm_strtom(nd, layp, layoutlen); } else if (nd->nd_repstat == NFSERR_LAYOUTTRYLATER) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } free(layp, M_TEMP); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layoutcommit service */ APPLESTATIC int nfsrvd_layoutcommit(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int error = 0, hasnewoff, hasnewmtime, layouttype, maxcnt, reclaim; int hasnewsize; uint64_t offset, len, newoff = 0, newsize; struct timespec newmtime; char *layp; struct thread *p = curthread; layp = NULL; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + 2 * NFSX_HYPER + NFSX_STATEID); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; reclaim = fxdr_unsigned(int, *tl++); stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } hasnewoff = fxdr_unsigned(int, *tl); if (hasnewoff != 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); newoff = fxdr_hyper(tl); tl += 2; } else NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); hasnewmtime = fxdr_unsigned(int, *tl); if (hasnewmtime != 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4TIME + 2 * NFSX_UNSIGNED); fxdr_nfsv4time(tl, &newmtime); tl += (NFSX_V4TIME / NFSX_UNSIGNED); } else NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); layouttype = fxdr_unsigned(int, *tl++); maxcnt = fxdr_unsigned(int, *tl); if (maxcnt > 0) { layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, layp, maxcnt); if (error != 0) goto nfsmout; } nd->nd_repstat = nfsrv_layoutcommit(nd, vp, layouttype, hasnewoff, newoff, offset, len, hasnewmtime, &newmtime, reclaim, &stateid, maxcnt, layp, &hasnewsize, &newsize, nd->nd_cred, p); NFSD_DEBUG(4, "nfsrv_layoutcommit stat=%u\n", nd->nd_repstat); if (nd->nd_repstat == 0) { if (hasnewsize != 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); *tl++ = newnfs_true; txdr_hyper(newsize, tl); } else { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } } nfsmout: free(layp, M_TEMP); vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layoutreturn service */ APPLESTATIC int nfsrvd_layoutreturn(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl, *layp; nfsv4stateid_t stateid; int error = 0, fnd, kind, layouttype, iomode, maxcnt, reclaim; uint64_t offset, len; struct thread *p = curthread; layp = NULL; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); reclaim = *tl++; layouttype = fxdr_unsigned(int, *tl++); iomode = fxdr_unsigned(int, *tl++); kind = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "layoutreturn recl=%d ltyp=%d iom=%d kind=%d\n", reclaim, layouttype, iomode, kind); if (kind == NFSV4LAYOUTRET_FILE) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + NFSX_UNSIGNED); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } maxcnt = fxdr_unsigned(int, *tl); if (maxcnt > 0) { layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, (char *)layp, maxcnt); if (error != 0) goto nfsmout; } } else { if (reclaim == newnfs_true) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } offset = len = 0; maxcnt = 0; } nd->nd_repstat = nfsrv_layoutreturn(nd, vp, layouttype, iomode, offset, len, reclaim, kind, &stateid, maxcnt, layp, &fnd, nd->nd_cred, p); NFSD_DEBUG(4, "nfsrv_layoutreturn stat=%u fnd=%d\n", nd->nd_repstat, fnd); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); if (fnd != 0) { *tl = newnfs_true; NFSM_BUILD(tl, uint32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY(stateid.other, tl, NFSX_STATEIDOTHER); } else *tl = newnfs_false; } nfsmout: free(layp, M_TEMP); vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layout error service */ APPLESTATIC int nfsrvd_layouterror(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int cnt, error = 0, i, stat; int opnum __unused; char devid[NFSX_V4DEVICEID]; uint64_t offset, len; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + NFSX_UNSIGNED); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "layouterror off=%ju len=%ju cnt=%d\n", (uintmax_t)offset, (uintmax_t)len, cnt); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } /* * Ignore offset, len and stateid for now. */ for (i = 0; i < cnt; i++) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED); NFSBCOPY(tl, devid, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); stat = fxdr_unsigned(int, *tl++); opnum = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "nfsrvd_layouterr op=%d stat=%d\n", opnum, stat); /* * Except for NFSERR_ACCES and NFSERR_STALE errors, * disable the mirror. */ if (stat != NFSERR_ACCES && stat != NFSERR_STALE) nfsrv_delds(devid, curthread); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layout stats service */ APPLESTATIC int nfsrvd_layoutstats(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int cnt, error = 0; int layouttype __unused; char devid[NFSX_V4DEVICEID] __unused; uint64_t offset, len, readcount, readbytes, writecount, writebytes __unused; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_HYPER + NFSX_STATEID + NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); readcount = fxdr_hyper(tl); tl += 2; readbytes = fxdr_hyper(tl); tl += 2; writecount = fxdr_hyper(tl); tl += 2; writebytes = fxdr_hyper(tl); tl += 2; NFSBCOPY(tl, devid, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); layouttype = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(cnt), -1); if (error != 0) goto nfsmout; NFSD_DEBUG(4, "layoutstats cnt=%d\n", cnt); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } /* * No use for the stats for now. */ nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 io_advise service */ APPLESTATIC int nfsrvd_ioadvise(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; nfsattrbit_t hints; int error = 0, ret; off_t offset, len; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER); stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); error = nfsrv_getattrbits(nd, &hints, NULL, NULL); if (error != 0) goto nfsmout; /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } if (offset < 0) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } if (len < 0) len = 0; if (vp->v_type != VREG) { if (vp->v_type == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_WRONGTYPE; goto nfsmout; } /* * For now, we can only handle WILLNEED and DONTNEED and don't use * the stateid. */ if ((NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED) && !NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED)) || (NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED) && !NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED))) { NFSVOPUNLOCK(vp); if (NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED)) { ret = VOP_ADVISE(vp, offset, len, POSIX_FADV_WILLNEED); NFSZERO_ATTRBIT(&hints); if (ret == 0) NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED); else NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL); } else { ret = VOP_ADVISE(vp, offset, len, POSIX_FADV_DONTNEED); NFSZERO_ATTRBIT(&hints); if (ret == 0) NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED); else NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL); } vrele(vp); } else { NFSZERO_ATTRBIT(&hints); NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL); vput(vp); } nfsrv_putattrbit(nd, &hints); NFSEXITCODE2(error, nd); return (error); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 getdeviceinfo service */ APPLESTATIC int nfsrvd_getdevinfo(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl, maxcnt, notify[NFSV4_NOTIFYBITMAP]; int cnt, devaddrlen, error = 0, i, layouttype; char devid[NFSX_V4DEVICEID], *devaddr; time_t dev_time; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_V4DEVICEID); NFSBCOPY(tl, devid, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); layouttype = fxdr_unsigned(int, *tl++); maxcnt = fxdr_unsigned(uint32_t, *tl++); cnt = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "getdevinfo ltyp=%d maxcnt=%u bitcnt=%d\n", layouttype, maxcnt, cnt); if (cnt > NFSV4_NOTIFYBITMAP || cnt < 0) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } if (cnt > 0) { NFSM_DISSECT(tl, uint32_t *, cnt * NFSX_UNSIGNED); for (i = 0; i < cnt; i++) notify[i] = fxdr_unsigned(uint32_t, *tl++); } for (i = cnt; i < NFSV4_NOTIFYBITMAP; i++) notify[i] = 0; /* * Check that the device id is not stale. Device ids are recreated * each time the nfsd threads are restarted. */ NFSBCOPY(devid, &dev_time, sizeof(dev_time)); if (dev_time != nfsdev_time) { nd->nd_repstat = NFSERR_NOENT; goto nfsmout; } /* Look for the device id. */ nd->nd_repstat = nfsrv_getdevinfo(devid, layouttype, &maxcnt, notify, &devaddrlen, &devaddr); NFSD_DEBUG(4, "nfsrv_getdevinfo stat=%u\n", nd->nd_repstat); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(layouttype); nfsm_strtom(nd, devaddr, devaddrlen); cnt = 0; for (i = 0; i < NFSV4_NOTIFYBITMAP; i++) { if (notify[i] != 0) cnt = i + 1; } NFSM_BUILD(tl, uint32_t *, (cnt + 1) * NFSX_UNSIGNED); *tl++ = txdr_unsigned(cnt); for (i = 0; i < cnt; i++) *tl++ = txdr_unsigned(notify[i]); } else if (nd->nd_repstat == NFSERR_TOOSMALL) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(maxcnt); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 test stateid service */ APPLESTATIC int nfsrvd_teststateid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t *stateidp = NULL, *tstateidp; int cnt, error = 0, i, ret; struct thread *p = curthread; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt <= 0 || cnt > 1024) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stateidp = mallocarray(cnt, sizeof(nfsv4stateid_t), M_TEMP, M_WAITOK); tstateidp = stateidp; for (i = 0; i < cnt; i++) { NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); tstateidp->seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, tstateidp->other, NFSX_STATEIDOTHER); tstateidp++; } NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(cnt); tstateidp = stateidp; for (i = 0; i < cnt; i++) { ret = nfsrv_teststateid(nd, tstateidp, p); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(ret); tstateidp++; } nfsmout: free(stateidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * nfs allocate service */ APPLESTATIC int nfsrvd_allocate(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; struct nfsvattr forat; int error = 0, forat_ret = 1, gotproxystateid; off_t off, len; struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; nfsv4stateid_t stateid; nfsquad_t clientid; nfsattrbit_t attrbits; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } gotproxystateid = 0; NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER); stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK2 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } stp->ls_stateid.other[2] = *tl++; /* * Don't allow this to be done for a DS. */ if ((nd->nd_flag & ND_DSSERVER) != 0) nd->nd_repstat = NFSERR_NOTSUPP; /* However, allow the proxy stateid. */ if (stp->ls_stateid.seqid == 0xffffffff && stp->ls_stateid.other[0] == 0x55555555 && stp->ls_stateid.other[1] == 0x55555555 && stp->ls_stateid.other[2] == 0x55555555) gotproxystateid = 1; off = fxdr_hyper(tl); tl += 2; lop->lo_first = off; len = fxdr_hyper(tl); lop->lo_end = off + len; /* * Paranoia, just in case it wraps around, which shouldn't * ever happen anyhow. */ if (nd->nd_repstat == 0 && (lop->lo_end < lop->lo_first || len <= 0)) nd->nd_repstat = NFSERR_INVAL; if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) nd->nd_repstat = NFSERR_WRONGTYPE; NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); forat_ret = nfsvno_getattr(vp, &forat, nd, curthread, 1, &attrbits); if (nd->nd_repstat == 0) nd->nd_repstat = forat_ret; if (nd->nd_repstat == 0 && (forat.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat == 0 && gotproxystateid == 0) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, curthread); if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_allocate(vp, off, len, nd->nd_cred, curthread); vput(vp); NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs copy service */ APPLESTATIC int nfsrvd_copy_file_range(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, vnode_t tovp, struct nfsexstuff *exp, struct nfsexstuff *toexp) { uint32_t *tl; struct nfsvattr at; int cnt, error = 0, ret; off_t inoff, outoff; uint64_t len; size_t xfer; struct nfsstate inst, outst, *instp = &inst, *outstp = &outst; struct nfslock inlo, outlo, *inlop = &inlo, *outlop = &outlo; nfsquad_t clientid; nfsv4stateid_t stateid; nfsattrbit_t attrbits; void *rl_rcookie, *rl_wcookie; rl_rcookie = rl_wcookie = NULL; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } if (nfsrv_devidcnt > 0) { /* * For a pNFS server, reply NFSERR_NOTSUPP so that the client * will do the copy via I/O on the DS(s). */ nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (vp == tovp) { /* Copying a byte range within the same file is not allowed. */ nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_STATEID + 3 * NFSX_HYPER + 3 * NFSX_UNSIGNED); instp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS); inlop->lo_flags = NFSLCK_READ; instp->ls_ownerlen = 0; instp->ls_op = NULL; instp->ls_uid = nd->nd_cred->cr_uid; instp->ls_stateid.seqid = fxdr_unsigned(uint32_t, *tl++); clientid.lval[0] = instp->ls_stateid.other[0] = *tl++; clientid.lval[1] = instp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) clientid.qval = nd->nd_clientid.qval; instp->ls_stateid.other[2] = *tl++; outstp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); outlop->lo_flags = NFSLCK_WRITE; outstp->ls_ownerlen = 0; outstp->ls_op = NULL; outstp->ls_uid = nd->nd_cred->cr_uid; outstp->ls_stateid.seqid = fxdr_unsigned(uint32_t, *tl++); outstp->ls_stateid.other[0] = *tl++; outstp->ls_stateid.other[1] = *tl++; outstp->ls_stateid.other[2] = *tl++; inoff = fxdr_hyper(tl); tl += 2; inlop->lo_first = inoff; outoff = fxdr_hyper(tl); tl += 2; outlop->lo_first = outoff; len = fxdr_hyper(tl); tl += 2; if (len == 0) { /* len == 0 means to EOF. */ inlop->lo_end = OFF_MAX; outlop->lo_end = OFF_MAX; } else { inlop->lo_end = inlop->lo_first + len; outlop->lo_end = outlop->lo_first + len; } /* * At this time only consecutive, synchronous copy is supported, * so ca_consecutive and ca_synchronous can be ignored. */ tl += 2; cnt = fxdr_unsigned(int, *tl); if ((nd->nd_flag & ND_DSSERVER) != 0 || cnt != 0) nd->nd_repstat = NFSERR_NOTSUPP; if (nd->nd_repstat == 0 && (inoff > OFF_MAX || outoff > OFF_MAX || inlop->lo_end > OFF_MAX || outlop->lo_end > OFF_MAX || inlop->lo_end < inlop->lo_first || outlop->lo_end < outlop->lo_first)) nd->nd_repstat = NFSERR_INVAL; if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) nd->nd_repstat = NFSERR_WRONGTYPE; /* Check permissions for the input file. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); ret = nfsvno_getattr(vp, &at, nd, curthread, 1, &attrbits); if (nd->nd_repstat == 0) nd->nd_repstat = ret; if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat == 0) nd->nd_repstat = nfsrv_lockctrl(vp, &instp, &inlop, NULL, clientid, &stateid, exp, nd, curthread); NFSVOPUNLOCK(vp); if (nd->nd_repstat != 0) goto out; error = NFSVOPLOCK(tovp, LK_SHARED); if (error != 0) goto out; if (vnode_vtype(tovp) != VREG) nd->nd_repstat = NFSERR_WRONGTYPE; /* For the output file, we only need the Owner attribute. */ ret = nfsvno_getattr(tovp, &at, nd, curthread, 1, &attrbits); if (nd->nd_repstat == 0) nd->nd_repstat = ret; if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(tovp, VWRITE, nd->nd_cred, toexp, curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat == 0) nd->nd_repstat = nfsrv_lockctrl(tovp, &outstp, &outlop, NULL, clientid, &stateid, toexp, nd, curthread); NFSVOPUNLOCK(tovp); /* Range lock the byte ranges for both invp and outvp. */ if (nd->nd_repstat == 0) { for (;;) { if (len == 0) { rl_wcookie = vn_rangelock_wlock(tovp, outoff, OFF_MAX); rl_rcookie = vn_rangelock_tryrlock(vp, inoff, OFF_MAX); } else { rl_wcookie = vn_rangelock_wlock(tovp, outoff, outoff + len); rl_rcookie = vn_rangelock_tryrlock(vp, inoff, inoff + len); } if (rl_rcookie != NULL) break; vn_rangelock_unlock(tovp, rl_wcookie); if (len == 0) rl_rcookie = vn_rangelock_rlock(vp, inoff, OFF_MAX); else rl_rcookie = vn_rangelock_rlock(vp, inoff, inoff + len); vn_rangelock_unlock(vp, rl_rcookie); } error = NFSVOPLOCK(vp, LK_SHARED); if (error == 0) { ret = nfsvno_getattr(vp, &at, nd, curthread, 1, NULL); if (ret == 0) { /* * Since invp is range locked, na_size should * not change. */ if (len == 0 && at.na_size > inoff) { /* * If len == 0, set it based on invp's * size. If offset is past EOF, just * leave len == 0. */ len = at.na_size - inoff; } else if (nfsrv_linux42server == 0 && inoff + len > at.na_size) { /* * RFC-7862 says that NFSERR_INVAL must * be returned when inoff + len exceeds * the file size, however the NFSv4.2 * Linux client likes to do this, so * only check if nfsrv_linux42server * is not set. */ nd->nd_repstat = NFSERR_INVAL; } } NFSVOPUNLOCK(vp); if (ret != 0 && nd->nd_repstat == 0) nd->nd_repstat = ret; } else if (nd->nd_repstat == 0) nd->nd_repstat = error; } /* * Do the actual copy to an upper limit of vfs.nfs.maxcopyrange. * This limit is applied to ensure that the RPC replies in a * reasonable time. */ if (len > nfs_maxcopyrange) xfer = nfs_maxcopyrange; else xfer = len; if (nd->nd_repstat == 0) { nd->nd_repstat = vn_copy_file_range(vp, &inoff, tovp, &outoff, &xfer, 0, nd->nd_cred, nd->nd_cred, NULL); if (nd->nd_repstat == 0) len = xfer; } /* Unlock the ranges. */ if (rl_rcookie != NULL) vn_rangelock_unlock(vp, rl_rcookie); if (rl_wcookie != NULL) vn_rangelock_unlock(tovp, rl_wcookie); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + NFSX_HYPER + NFSX_VERF); *tl++ = txdr_unsigned(0); /* No callback ids. */ txdr_hyper(len, tl); tl += 2; *tl++ = txdr_unsigned(NFSWRITE_UNSTABLE); *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl++ = txdr_unsigned(nfsboottime.tv_usec); *tl++ = newnfs_true; *tl = newnfs_true; } out: vrele(vp); vrele(tovp); NFSEXITCODE2(error, nd); return (error); nfsmout: vput(vp); vrele(tovp); NFSEXITCODE2(error, nd); return (error); } /* * nfs seek service */ APPLESTATIC int nfsrvd_seek(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; struct nfsvattr at; int content, error = 0; off_t off; u_long cmd; nfsattrbit_t attrbits; bool eof; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + NFSX_HYPER + NFSX_UNSIGNED); /* Ignore the stateid for now. */ tl += (NFSX_STATEID / NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; content = fxdr_unsigned(int, *tl); if (content == NFSV4CONTENT_DATA) cmd = FIOSEEKDATA; else if (content == NFSV4CONTENT_HOLE) cmd = FIOSEEKHOLE; else nd->nd_repstat = NFSERR_BADXDR; if (nd->nd_repstat == 0 && vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) nd->nd_repstat = NFSERR_WRONGTYPE; if (nd->nd_repstat == 0 && off < 0) nd->nd_repstat = NFSERR_NXIO; if (nd->nd_repstat == 0) { /* Check permissions for the input file. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); nd->nd_repstat = nfsvno_getattr(vp, &at, nd, curthread, 1, &attrbits); } if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat != 0) goto nfsmout; /* nfsvno_seek() unlocks and vrele()s the vp. */ nd->nd_repstat = nfsvno_seek(nd, vp, cmd, &off, content, &eof, nd->nd_cred, curthread); if (nd->nd_repstat == 0 && eof && content == NFSV4CONTENT_DATA && nfsrv_linux42server != 0) nd->nd_repstat = NFSERR_NXIO; if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); if (eof) *tl++ = newnfs_true; else *tl++ = newnfs_false; txdr_hyper(off, tl); } NFSEXITCODE2(error, nd); return (error); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs get extended attribute service */ APPLESTATIC int nfsrvd_getxattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; struct mbuf *mp = NULL, *mpend = NULL; int error, len; char *name; struct thread *p = curthread; struct mbuf_ext_pgs *pgs; uint16_t off; error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (len > EXTATTR_MAXNAMELEN) { nd->nd_repstat = NFSERR_NOXATTR; goto nfsmout; } name = malloc(len + 1, M_TEMP, M_WAITOK); nd->nd_repstat = nfsrv_mtostr(nd, name, len); if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_getxattr(vp, name, nd->nd_maxresp, nd->nd_cred, nd->nd_flag, nd->nd_maxextsiz, p, &mp, &mpend, &len); if (nd->nd_repstat == ENOATTR) nd->nd_repstat = NFSERR_NOXATTR; else if (nd->nd_repstat == EOPNOTSUPP) nd->nd_repstat = NFSERR_NOTSUPP; if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(len); if (mp != NULL) { nd->nd_mb->m_next = mp; nd->nd_mb = mpend; if ((mpend->m_flags & M_NOMAP) != 0) { nd->nd_flag |= ND_NOMAP; - pgs = mpend->m_ext.ext_pgs; + pgs = &mpend->m_ext_pgs; nd->nd_bextpg = pgs->npgs - 1; nd->nd_bpos = (char *)(void *) - PHYS_TO_DMAP(pgs->pa[nd->nd_bextpg]); + PHYS_TO_DMAP(mpend->m_epg_pa[nd->nd_bextpg]); off = (nd->nd_bextpg == 0) ? pgs->first_pg_off : 0; nd->nd_bpos += off + pgs->last_pg_len; nd->nd_bextpgsiz = PAGE_SIZE - pgs->last_pg_len - off; } else nd->nd_bpos = mtod(mpend, char *) + mpend->m_len; } } free(name, M_TEMP); nfsmout: if (nd->nd_repstat == 0) nd->nd_repstat = error; vput(vp); NFSEXITCODE2(0, nd); return (0); } /* * nfs set extended attribute service */ APPLESTATIC int nfsrvd_setxattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; struct nfsvattr ova, nva; nfsattrbit_t attrbits; int error, len, opt; char *name; size_t siz; struct thread *p = curthread; error = 0; name = NULL; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); opt = fxdr_unsigned(int, *tl++); len = fxdr_unsigned(int, *tl); if (len <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (len > EXTATTR_MAXNAMELEN) { nd->nd_repstat = NFSERR_NOXATTR; goto nfsmout; } name = malloc(len + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, name, len); if (error != 0) goto nfsmout; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len < 0 || len > IOSIZE_MAX) { nd->nd_repstat = NFSERR_XATTR2BIG; goto nfsmout; } switch (opt) { case NFSV4SXATTR_CREATE: error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, &siz, nd->nd_cred, p); if (error != ENOATTR) nd->nd_repstat = NFSERR_EXIST; error = 0; break; case NFSV4SXATTR_REPLACE: error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, &siz, nd->nd_cred, p); if (error != 0) nd->nd_repstat = NFSERR_NOXATTR; break; case NFSV4SXATTR_EITHER: break; default: nd->nd_repstat = NFSERR_BADXDR; } if (nd->nd_repstat != 0) goto nfsmout; /* Now, do the Set Extended attribute, with Change before and after. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); nd->nd_repstat = nfsvno_getattr(vp, &ova, nd, p, 1, &attrbits); if (nd->nd_repstat == 0) { nd->nd_repstat = nfsvno_setxattr(vp, name, len, nd->nd_md, nd->nd_dpos, nd->nd_dextpg, nd->nd_dextpgsiz, nd->nd_cred, p); if (nd->nd_repstat == ENXIO) nd->nd_repstat = NFSERR_XATTR2BIG; } if (nd->nd_repstat == 0 && len > 0) nd->nd_repstat = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(ova.na_filerev, tl); tl += 2; txdr_hyper(nva.na_filerev, tl); } nfsmout: free(name, M_TEMP); if (nd->nd_repstat == 0) nd->nd_repstat = error; vput(vp); NFSEXITCODE2(0, nd); return (0); } /* * nfs remove extended attribute service */ APPLESTATIC int nfsrvd_rmxattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; struct nfsvattr ova, nva; nfsattrbit_t attrbits; int error, len; char *name; struct thread *p = curthread; error = 0; name = NULL; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (len > EXTATTR_MAXNAMELEN) { nd->nd_repstat = NFSERR_NOXATTR; goto nfsmout; } name = malloc(len + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, name, len); if (error != 0) goto nfsmout; if ((nd->nd_flag & ND_IMPLIEDCLID) == 0) { printf("EEK! nfsrvd_rmxattr: no implied clientid\n"); error = NFSERR_NOXATTR; goto nfsmout; } /* * Now, do the Remove Extended attribute, with Change before and * after. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); nd->nd_repstat = nfsvno_getattr(vp, &ova, nd, p, 1, &attrbits); if (nd->nd_repstat == 0) { nd->nd_repstat = nfsvno_rmxattr(nd, vp, name, nd->nd_cred, p); if (nd->nd_repstat == ENOATTR) nd->nd_repstat = NFSERR_NOXATTR; } if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(ova.na_filerev, tl); tl += 2; txdr_hyper(nva.na_filerev, tl); } nfsmout: free(name, M_TEMP); if (nd->nd_repstat == 0) nd->nd_repstat = error; vput(vp); NFSEXITCODE2(0, nd); return (0); } /* * nfs list extended attribute service */ APPLESTATIC int nfsrvd_listxattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t cnt, *tl, len, len2, i, pos, retlen; int error; uint64_t cookie, cookie2; u_char *buf; bool eof; struct thread *p = curthread; error = 0; buf = NULL; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); /* * The cookie doesn't need to be in net byte order, but FreeBSD * does so to make it more readable in packet traces. */ cookie = fxdr_hyper(tl); tl += 2; len = fxdr_unsigned(uint32_t, *tl); if (len == 0 || cookie >= IOSIZE_MAX) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (len > nd->nd_maxresp - NFS_MAXXDR) len = nd->nd_maxresp - NFS_MAXXDR; len2 = len; nd->nd_repstat = nfsvno_listxattr(vp, cookie, nd->nd_cred, p, &buf, &len, &eof); if (nd->nd_repstat == EOPNOTSUPP) nd->nd_repstat = NFSERR_NOTSUPP; if (nd->nd_repstat == 0) { cookie2 = cookie + len; if (cookie2 < cookie) nd->nd_repstat = NFSERR_BADXDR; } if (nd->nd_repstat == 0) { /* Now copy the entries out. */ retlen = NFSX_HYPER + 2 * NFSX_UNSIGNED; if (len == 0 && retlen <= len2) { /* The cookie was at eof. */ NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); txdr_hyper(cookie2, tl); tl += 2; *tl++ = txdr_unsigned(0); *tl = newnfs_true; goto nfsmout; } /* Sanity check the cookie. */ for (pos = 0; pos < len; pos += (i + 1)) { if (pos == cookie) break; i = buf[pos]; } if (pos != cookie) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } /* Loop around copying the entrie(s) out. */ cnt = 0; len -= cookie; i = buf[pos]; while (i < len && len2 >= retlen + NFSM_RNDUP(i) + NFSX_UNSIGNED) { if (cnt == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); txdr_hyper(cookie2, tl); tl += 2; } retlen += nfsm_strtom(nd, &buf[pos + 1], i); len -= (i + 1); pos += (i + 1); i = buf[pos]; cnt++; } /* * eof is set true/false by nfsvno_listxattr(), but if we * can't copy all entries returned by nfsvno_listxattr(), * we are not at eof. */ if (len > 0) eof = false; if (cnt > 0) { /* *tl is set above. */ *tl = txdr_unsigned(cnt); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); if (eof) *tl = newnfs_true; else *tl = newnfs_false; } else nd->nd_repstat = NFSERR_TOOSMALL; } nfsmout: free(buf, M_TEMP); if (nd->nd_repstat == 0) nd->nd_repstat = error; vput(vp); NFSEXITCODE2(0, nd); return (0); } /* * nfsv4 service not supported */ APPLESTATIC int nfsrvd_notsupp(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { nd->nd_repstat = NFSERR_NOTSUPP; NFSEXITCODE2(0, nd); return (0); } Index: projects/nfs-over-tls/sys/fs/nfsserver/nfs_nfsdsubs.c =================================================================== --- projects/nfs-over-tls/sys/fs/nfsserver/nfs_nfsdsubs.c (revision 360215) +++ projects/nfs-over-tls/sys/fs/nfsserver/nfs_nfsdsubs.c (revision 360216) @@ -1,2199 +1,2200 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #include extern u_int32_t newnfs_true, newnfs_false; extern int nfs_pubfhset; extern struct nfsclienthashhead *nfsclienthash; extern int nfsrv_clienthashsize; extern struct nfslockhashhead *nfslockhash; extern int nfsrv_lockhashsize; extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; extern int nfsrv_useacl; extern uid_t nfsrv_defaultuid; extern gid_t nfsrv_defaultgid; char nfs_v2pubfh[NFSX_V2FH]; struct nfsdontlisthead nfsrv_dontlisthead; struct nfslayouthead nfsrv_recalllisthead; static nfstype newnfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, NFCHR, NFNON }; extern nfstype nfsv34_type[9]; #endif /* !APPLEKEXT */ static u_int32_t nfsrv_isannfserr(u_int32_t); SYSCTL_DECL(_vfs_nfsd); static int enable_checkutf8 = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_checkutf8, CTLFLAG_RW, &enable_checkutf8, 0, "Enable the NFSv4 check for the UTF8 compliant name required by rfc3530"); static int enable_nobodycheck = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_nobodycheck, CTLFLAG_RW, &enable_nobodycheck, 0, "Enable the NFSv4 check when setting user nobody as owner"); static int enable_nogroupcheck = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_nogroupcheck, CTLFLAG_RW, &enable_nogroupcheck, 0, "Enable the NFSv4 check when setting group nogroup as owner"); static char nfsrv_hexdigit(char, int *); /* * Maps errno values to nfs error numbers. * Use NFSERR_IO as the catch all for ones not specifically defined in * RFC 1094. (It now includes the errors added for NFSv3.) */ static u_char nfsrv_v2errmap[NFSERR_REMOTE] = { NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_EXIST, NFSERR_XDEV, NFSERR_NODEV, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_INVAL, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, NFSERR_MLINK, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, NFSERR_REMOTE, }; /* * Maps errno values to nfs error numbers. * Although it is not obvious whether or not NFS clients really care if * a returned error value is in the specified list for the procedure, the * safest thing to do is filter them appropriately. For Version 2, the * X/Open XNFS document is the only specification that defines error values * for each RPC (The RFC simply lists all possible error values for all RPCs), * so I have decided to not do this for Version 2. * The first entry is the default error return and the rest are the valid * errors for that RPC in increasing numeric order. */ static short nfsv3err_null[] = { 0, 0, }; static short nfsv3err_getattr[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_setattr[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_PERM, NFSERR_IO, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOT_SYNC, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_lookup[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_ACCES, NFSERR_NAMETOL, NFSERR_IO, NFSERR_NOTDIR, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_access[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_readlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_read[] = { NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_write[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_NOSPC, NFSERR_INVAL, NFSERR_FBIG, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_create[] = { NFSERR_IO, NFSERR_EXIST, NFSERR_NAMETOL, NFSERR_ACCES, NFSERR_IO, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_mkdir[] = { NFSERR_IO, NFSERR_EXIST, NFSERR_ACCES, NFSERR_NAMETOL, NFSERR_IO, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_symlink[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NAMETOL, NFSERR_NOSPC, NFSERR_IO, NFSERR_NOTDIR, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_mknod[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NAMETOL, NFSERR_NOSPC, NFSERR_IO, NFSERR_NOTDIR, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, NFSERR_BADTYPE, 0, }; static short nfsv3err_remove[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_ACCES, NFSERR_NAMETOL, NFSERR_IO, NFSERR_NOTDIR, NFSERR_ROFS, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_rmdir[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_NAMETOL, NFSERR_IO, NFSERR_EXIST, NFSERR_INVAL, NFSERR_ROFS, NFSERR_NOTEMPTY, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_rename[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NAMETOL, NFSERR_XDEV, NFSERR_IO, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_NOTEMPTY, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_link[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NAMETOL, NFSERR_IO, NFSERR_XDEV, NFSERR_NOTDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_readdir[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_readdirplus[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_NOTSUPP, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_fsstat[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_fsinfo[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_pathconf[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_commit[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short *nfsrv_v3errmap[] = { nfsv3err_null, nfsv3err_getattr, nfsv3err_setattr, nfsv3err_lookup, nfsv3err_access, nfsv3err_readlink, nfsv3err_read, nfsv3err_write, nfsv3err_create, nfsv3err_mkdir, nfsv3err_symlink, nfsv3err_mknod, nfsv3err_remove, nfsv3err_rmdir, nfsv3err_rename, nfsv3err_link, nfsv3err_readdir, nfsv3err_readdirplus, nfsv3err_fsstat, nfsv3err_fsinfo, nfsv3err_pathconf, nfsv3err_commit, }; /* * And the same for V4. */ static short nfsv4err_null[] = { 0, 0, }; static short nfsv4err_access[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_close[] = { NFSERR_EXPIRED, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADSEQID, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_LOCKSHELD, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_OLDSTATEID, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_commit[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_ISDIR, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_create[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_ATTRNOTSUPP, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADOWNER, NFSERR_BADTYPE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_EXIST, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOFILEHANDLE, NFSERR_NOSPC, NFSERR_NOTDIR, NFSERR_PERM, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_delegpurge[] = { NFSERR_SERVERFAULT, NFSERR_BADXDR, NFSERR_NOTSUPP, NFSERR_LEASEMOVED, NFSERR_MOVED, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALECLIENTID, 0, }; static short nfsv4err_delegreturn[] = { NFSERR_SERVERFAULT, NFSERR_ADMINREVOKED, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_EXPIRED, NFSERR_INVAL, NFSERR_LEASEMOVED, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOTSUPP, NFSERR_OLDSTATEID, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_getattr[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_getfh[] = { NFSERR_BADHANDLE, NFSERR_BADHANDLE, NFSERR_FHEXPIRED, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_link[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_EXIST, NFSERR_FHEXPIRED, NFSERR_FILEOPEN, NFSERR_INVAL, NFSERR_IO, NFSERR_ISDIR, NFSERR_MLINK, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOSPC, NFSERR_NOTDIR, NFSERR_NOTSUPP, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_WRONGSEC, NFSERR_XDEV, 0, }; static short nfsv4err_lock[] = { NFSERR_SERVERFAULT, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADRANGE, NFSERR_BADSEQID, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_DEADLOCK, NFSERR_DELAY, NFSERR_DENIED, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_LOCKNOTSUPP, NFSERR_LOCKRANGE, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOGRACE, NFSERR_OLDSTATEID, NFSERR_OPENMODE, NFSERR_RECLAIMBAD, NFSERR_RECLAIMCONFLICT, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALECLIENTID, NFSERR_STALESTATEID, 0, }; static short nfsv4err_lockt[] = { NFSERR_SERVERFAULT, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_BADRANGE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DENIED, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_LOCKRANGE, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALECLIENTID, 0, }; static short nfsv4err_locku[] = { NFSERR_SERVERFAULT, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADRANGE, NFSERR_BADSEQID, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_LOCKRANGE, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_OLDSTATEID, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_lookup[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADXDR, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOTDIR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_SYMLINK, NFSERR_WRONGSEC, 0, }; static short nfsv4err_lookupp[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_FHEXPIRED, NFSERR_IO, NFSERR_MOVED, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOTDIR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_nverify[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_ATTRNOTSUPP, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_SAME, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_open[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_ATTRNOTSUPP, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADOWNER, NFSERR_BADSEQID, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_EXIST, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_IO, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOGRACE, NFSERR_NOSPC, NFSERR_NOTDIR, NFSERR_NOTSUPP, NFSERR_PERM, NFSERR_RECLAIMBAD, NFSERR_RECLAIMCONFLICT, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_SHAREDENIED, NFSERR_STALE, NFSERR_STALECLIENTID, NFSERR_SYMLINK, NFSERR_WRONGSEC, 0, }; static short nfsv4err_openattr[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_FHEXPIRED, NFSERR_IO, NFSERR_MOVED, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOSPC, NFSERR_NOTSUPP, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_openconfirm[] = { NFSERR_SERVERFAULT, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADSEQID, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_OLDSTATEID, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_opendowngrade[] = { NFSERR_SERVERFAULT, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADSEQID, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_OLDSTATEID, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_putfh[] = { NFSERR_SERVERFAULT, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_FHEXPIRED, NFSERR_MOVED, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_WRONGSEC, 0, }; static short nfsv4err_putpubfh[] = { NFSERR_SERVERFAULT, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_WRONGSEC, 0, }; static short nfsv4err_putrootfh[] = { NFSERR_SERVERFAULT, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_WRONGSEC, 0, }; static short nfsv4err_read[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_IO, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_LOCKED, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NXIO, NFSERR_OLDSTATEID, NFSERR_OPENMODE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_readdir[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOTDIR, NFSERR_NOTSAME, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_TOOSMALL, 0, }; static short nfsv4err_readlink[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_ISDIR, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOTSUPP, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_remove[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_FILEOPEN, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOTDIR, NFSERR_NOTEMPTY, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_rename[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_EXIST, NFSERR_FHEXPIRED, NFSERR_FILEOPEN, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOSPC, NFSERR_NOTDIR, NFSERR_NOTEMPTY, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_WRONGSEC, NFSERR_XDEV, 0, }; static short nfsv4err_renew[] = { NFSERR_SERVERFAULT, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_BADXDR, NFSERR_CBPATHDOWN, NFSERR_EXPIRED, NFSERR_LEASEMOVED, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALECLIENTID, 0, }; static short nfsv4err_restorefh[] = { NFSERR_SERVERFAULT, NFSERR_BADHANDLE, NFSERR_FHEXPIRED, NFSERR_MOVED, NFSERR_RESOURCE, NFSERR_RESTOREFH, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_WRONGSEC, 0, }; static short nfsv4err_savefh[] = { NFSERR_SERVERFAULT, NFSERR_BADHANDLE, NFSERR_FHEXPIRED, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_secinfo[] = { NFSERR_SERVERFAULT, NFSERR_ACCES, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADXDR, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOTDIR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_setattr[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_ATTRNOTSUPP, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADOWNER, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_EXPIRED, NFSERR_FBIG, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_INVAL, NFSERR_IO, NFSERR_ISDIR, NFSERR_LOCKED, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOSPC, NFSERR_OLDSTATEID, NFSERR_OPENMODE, NFSERR_PERM, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_setclientid[] = { NFSERR_SERVERFAULT, NFSERR_BADXDR, NFSERR_CLIDINUSE, NFSERR_INVAL, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_WRONGSEC, 0, }; static short nfsv4err_setclientidconfirm[] = { NFSERR_SERVERFAULT, NFSERR_BADXDR, NFSERR_CLIDINUSE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALECLIENTID, 0, }; static short nfsv4err_verify[] = { NFSERR_SERVERFAULT, NFSERR_ACCES, NFSERR_ATTRNOTSUPP, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOTSAME, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_write[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_EXPIRED, NFSERR_FBIG, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_INVAL, NFSERR_IO, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_LOCKED, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOSPC, NFSERR_NXIO, NFSERR_OLDSTATEID, NFSERR_OPENMODE, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_releaselockowner[] = { NFSERR_SERVERFAULT, NFSERR_ADMINREVOKED, NFSERR_BADXDR, NFSERR_EXPIRED, NFSERR_LEASEMOVED, NFSERR_LOCKSHELD, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALECLIENTID, 0, }; static short *nfsrv_v4errmap[] = { nfsv4err_null, nfsv4err_null, nfsv4err_null, nfsv4err_access, nfsv4err_close, nfsv4err_commit, nfsv4err_create, nfsv4err_delegpurge, nfsv4err_delegreturn, nfsv4err_getattr, nfsv4err_getfh, nfsv4err_link, nfsv4err_lock, nfsv4err_lockt, nfsv4err_locku, nfsv4err_lookup, nfsv4err_lookupp, nfsv4err_nverify, nfsv4err_open, nfsv4err_openattr, nfsv4err_openconfirm, nfsv4err_opendowngrade, nfsv4err_putfh, nfsv4err_putpubfh, nfsv4err_putrootfh, nfsv4err_read, nfsv4err_readdir, nfsv4err_readlink, nfsv4err_remove, nfsv4err_rename, nfsv4err_renew, nfsv4err_restorefh, nfsv4err_savefh, nfsv4err_secinfo, nfsv4err_setattr, nfsv4err_setclientid, nfsv4err_setclientidconfirm, nfsv4err_verify, nfsv4err_write, nfsv4err_releaselockowner, }; /* * Trim tlen bytes off the end of the mbuf list and then ensure * the end of the last mbuf is nul filled to a long boundary, * as indicated by the value of "nul". * Return the last mbuf in the updated list and free and mbufs * that follow it in the original list. * This is somewhat different than the old nfsrv_adj() with * support for ext_pgs mbufs. It frees the remaining mbufs * instead of setting them 0 length, since lists of ext_pgs * mbufs are all expected to be non-empty. */ struct mbuf * nfsrv_adj(struct mbuf *mp, int len, int nul) { struct mbuf *m, *m2; struct mbuf_ext_pgs *pgs; vm_page_t pg; int i, lastlen, pgno, plen, tlen, trim; uint16_t off; char *cp; /* * Find the last mbuf after adjustment and * how much it needs to be adjusted by. */ tlen = 0; m = mp; for (;;) { tlen += m->m_len; if (m->m_next == NULL) break; m = m->m_next; } /* m is now the last mbuf and tlen the total length. */ if (len >= m->m_len) { /* Need to trim away the last mbuf(s). */ i = tlen - len; m = mp; for (;;) { if (m->m_len >= i) break; i -= m->m_len; m = m->m_next; } lastlen = i; } else lastlen = m->m_len - len; /* Adjust the last mbuf. */ if ((m->m_flags & M_NOMAP) != 0) { - pgs = m->m_ext.ext_pgs; + pgs = &m->m_ext_pgs; pgno = pgs->npgs - 1; off = (pgno == 0) ? pgs->first_pg_off : 0; plen = mbuf_ext_pg_len(pgs, pgno, off); if (m->m_len > lastlen) { /* Trim this mbuf. */ trim = m->m_len - lastlen; while (trim >= plen) { /* Free page. */ - pg = PHYS_TO_VM_PAGE(pgs->pa[pgno]); + pg = PHYS_TO_VM_PAGE(m->m_epg_pa[pgno]); vm_page_unwire_noq(pg); vm_page_free(pg); trim -= plen; pgs->npgs--; pgno--; off = (pgno == 0) ? pgs->first_pg_off : 0; plen = mbuf_ext_pg_len(pgs, pgno, off); } plen -= trim; pgs->last_pg_len = plen; m->m_len = lastlen; } - cp = (char *)(void *)PHYS_TO_DMAP(pgs->pa[pgno]); + cp = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]); cp += off + plen - nul; } else { m->m_len = lastlen; cp = mtod(m, char *) + m->m_len - nul; } /* Write the nul bytes. */ for (i = 0; i < nul; i++) *cp++ = '\0'; /* Free up any mbufs past "m". */ m2 = m->m_next; m->m_next = NULL; if (m2 != NULL) m_freem(m2); return (m); } /* * Make these functions instead of macros, so that the kernel text size * doesn't get too big... */ APPLESTATIC void nfsrv_wcc(struct nfsrv_descript *nd, int before_ret, struct nfsvattr *before_nvap, int after_ret, struct nfsvattr *after_nvap) { u_int32_t *tl; if (before_ret) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } else { NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(before_nvap->na_size, tl); tl += 2; txdr_nfsv3time(&(before_nvap->na_mtime), tl); tl += 2; txdr_nfsv3time(&(before_nvap->na_ctime), tl); } nfsrv_postopattr(nd, after_ret, after_nvap); } APPLESTATIC void nfsrv_postopattr(struct nfsrv_descript *nd, int after_ret, struct nfsvattr *after_nvap) { u_int32_t *tl; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (after_ret) *tl = newnfs_false; else { *tl = newnfs_true; nfsrv_fillattr(nd, after_nvap); } } /* * Fill in file attributes for V2 and 3. For V4, call a separate * routine that sifts through all the attribute bits. */ APPLESTATIC void nfsrv_fillattr(struct nfsrv_descript *nd, struct nfsvattr *nvap) { struct nfs_fattr *fp; int fattr_size; /* * Build space for the attribute structure. */ if (nd->nd_flag & ND_NFSV3) fattr_size = NFSX_V3FATTR; else fattr_size = NFSX_V2FATTR; NFSM_BUILD(fp, struct nfs_fattr *, fattr_size); /* * Now just fill it all in. */ fp->fa_nlink = txdr_unsigned(nvap->na_nlink); fp->fa_uid = txdr_unsigned(nvap->na_uid); fp->fa_gid = txdr_unsigned(nvap->na_gid); if (nd->nd_flag & ND_NFSV3) { fp->fa_type = vtonfsv34_type(nvap->na_type); fp->fa_mode = vtonfsv34_mode(nvap->na_mode); txdr_hyper(nvap->na_size, &fp->fa3_size); txdr_hyper(nvap->na_bytes, &fp->fa3_used); fp->fa3_rdev.specdata1 = txdr_unsigned(NFSMAJOR(nvap->na_rdev)); fp->fa3_rdev.specdata2 = txdr_unsigned(NFSMINOR(nvap->na_rdev)); fp->fa3_fsid.nfsuquad[0] = 0; fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(nvap->na_fsid); txdr_hyper(nvap->na_fileid, &fp->fa3_fileid); txdr_nfsv3time(&nvap->na_atime, &fp->fa3_atime); txdr_nfsv3time(&nvap->na_mtime, &fp->fa3_mtime); txdr_nfsv3time(&nvap->na_ctime, &fp->fa3_ctime); } else { fp->fa_type = vtonfsv2_type(nvap->na_type); fp->fa_mode = vtonfsv2_mode(nvap->na_type, nvap->na_mode); fp->fa2_size = txdr_unsigned(nvap->na_size); fp->fa2_blocksize = txdr_unsigned(nvap->na_blocksize); if (nvap->na_type == VFIFO) fp->fa2_rdev = 0xffffffff; else fp->fa2_rdev = txdr_unsigned(nvap->na_rdev); fp->fa2_blocks = txdr_unsigned(nvap->na_bytes / NFS_FABLKSIZE); fp->fa2_fsid = txdr_unsigned(nvap->na_fsid); fp->fa2_fileid = txdr_unsigned(nvap->na_fileid); txdr_nfsv2time(&nvap->na_atime, &fp->fa2_atime); txdr_nfsv2time(&nvap->na_mtime, &fp->fa2_mtime); txdr_nfsv2time(&nvap->na_ctime, &fp->fa2_ctime); } } /* * This function gets a file handle out of an mbuf list. * It returns 0 for success, EBADRPC otherwise. * If sets the third flagp argument to 1 if the file handle is * the public file handle. * For NFSv4, if the length is incorrect, set nd_repstat == NFSERR_BADHANDLE */ APPLESTATIC int nfsrv_mtofh(struct nfsrv_descript *nd, struct nfsrvfh *fhp) { u_int32_t *tl; int error = 0, len, copylen; if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len == 0 && nfs_pubfhset && (nd->nd_flag & ND_NFSV3) && nd->nd_procnum == NFSPROC_LOOKUP) { nd->nd_flag |= ND_PUBLOOKUP; goto nfsmout; } copylen = len; /* If len == NFSX_V4PNFSFH the RPC is a pNFS DS one. */ if (len == NFSX_V4PNFSFH && (nd->nd_flag & ND_NFSV41) != 0) { copylen = NFSX_MYFH; len = NFSM_RNDUP(len); nd->nd_flag |= ND_DSSERVER; } else if (len < NFSRV_MINFH || len > NFSRV_MAXFH) { if (nd->nd_flag & ND_NFSV4) { if (len > 0 && len <= NFSX_V4FHMAX) { error = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (error) goto nfsmout; nd->nd_repstat = NFSERR_BADHANDLE; goto nfsmout; } else { error = EBADRPC; goto nfsmout; } } else { error = EBADRPC; goto nfsmout; } } } else { /* * For NFSv2, the file handle is always 32 bytes on the * wire, but this server only cares about the first * NFSRV_MAXFH bytes. */ len = NFSX_V2FH; copylen = NFSRV_MAXFH; } NFSM_DISSECT(tl, u_int32_t *, len); if ((nd->nd_flag & ND_NFSV2) && nfs_pubfhset && nd->nd_procnum == NFSPROC_LOOKUP && !NFSBCMP((caddr_t)tl, nfs_v2pubfh, NFSX_V2FH)) { nd->nd_flag |= ND_PUBLOOKUP; goto nfsmout; } NFSBCOPY(tl, (caddr_t)fhp->nfsrvfh_data, copylen); fhp->nfsrvfh_len = copylen; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Map errnos to NFS error numbers. For Version 3 and 4 also filter out error * numbers not specified for the associated procedure. * NFSPROC_NOOP is a special case, where the high order bits of nd_repstat * should be cleared. NFSPROC_NOOP is used to return errors when a valid * RPC procedure is not involved. * Returns the error number in XDR. */ APPLESTATIC int nfsd_errmap(struct nfsrv_descript *nd) { short *defaulterrp, *errp; if (!nd->nd_repstat) return (0); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { if (nd->nd_procnum == NFSPROC_NOOP) return (txdr_unsigned(nd->nd_repstat & 0xffff)); if (nd->nd_flag & ND_NFSV3) errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; else if (nd->nd_repstat == EBADRPC) return (txdr_unsigned(NFSERR_BADXDR)); else if (nd->nd_repstat == NFSERR_MINORVERMISMATCH || nd->nd_repstat == NFSERR_OPILLEGAL) return (txdr_unsigned(nd->nd_repstat)); else if ((nd->nd_flag & ND_NFSV41) != 0) { if (nd->nd_repstat == EOPNOTSUPP) nd->nd_repstat = NFSERR_NOTSUPP; nd->nd_repstat = nfsrv_isannfserr(nd->nd_repstat); return (txdr_unsigned(nd->nd_repstat)); } else errp = defaulterrp = nfsrv_v4errmap[nd->nd_procnum]; while (*++errp) if (*errp == nd->nd_repstat) return (txdr_unsigned(nd->nd_repstat)); return (txdr_unsigned(*defaulterrp)); } if (nd->nd_repstat <= NFSERR_REMOTE) return (txdr_unsigned(nfsrv_v2errmap[nd->nd_repstat - 1])); return (txdr_unsigned(NFSERR_IO)); } /* * Check to see if the error is a valid NFS one. If not, replace it with * NFSERR_IO. */ static u_int32_t nfsrv_isannfserr(u_int32_t errval) { if (errval == NFSERR_OK) return (errval); if (errval >= NFSERR_BADHANDLE && errval <= NFSERR_MAXERRVAL) return (errval); if (errval > 0 && errval <= NFSERR_REMOTE) return (nfsrv_v2errmap[errval - 1]); return (NFSERR_IO); } /* * Check to see if setting a uid/gid is permitted when creating a new * file object. (Called when uid and/or gid is specified in the * settable attributes for V4. */ APPLESTATIC int nfsrv_checkuidgid(struct nfsrv_descript *nd, struct nfsvattr *nvap) { int error = 0; /* * If not setting either uid nor gid, it's OK. */ if (NFSVNO_NOTSETUID(nvap) && NFSVNO_NOTSETGID(nvap)) goto out; if ((NFSVNO_ISSETUID(nvap) && nvap->na_uid == nfsrv_defaultuid && enable_nobodycheck == 1) || (NFSVNO_ISSETGID(nvap) && nvap->na_gid == nfsrv_defaultgid && enable_nogroupcheck == 1)) { error = NFSERR_BADOWNER; goto out; } if (nd->nd_cred->cr_uid == 0) goto out; if ((NFSVNO_ISSETUID(nvap) && nvap->na_uid != nd->nd_cred->cr_uid) || (NFSVNO_ISSETGID(nvap) && nvap->na_gid != nd->nd_cred->cr_gid && !groupmember(nvap->na_gid, nd->nd_cred))) error = NFSERR_PERM; out: NFSEXITCODE2(error, nd); return (error); } /* * and this routine fixes up the settable attributes for V4 if allowed * by nfsrv_checkuidgid(). */ APPLESTATIC void nfsrv_fixattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, NFSACL_T *aclp, NFSPROC_T *p, nfsattrbit_t *attrbitp, struct nfsexstuff *exp) { int change = 0; struct nfsvattr nva; uid_t tuid; int error; nfsattrbit_t nattrbits; /* * Maybe this should be done for V2 and 3 but it never has been * and nobody seems to be upset, so I think it's best not to change * the V2 and 3 semantics. */ if ((nd->nd_flag & ND_NFSV4) == 0) goto out; NFSVNO_ATTRINIT(&nva); NFSZERO_ATTRBIT(&nattrbits); tuid = nd->nd_cred->cr_uid; if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) && NFSVNO_ISSETUID(nvap) && nvap->na_uid != nd->nd_cred->cr_uid) { if (nd->nd_cred->cr_uid == 0) { nva.na_uid = nvap->na_uid; change++; NFSSETBIT_ATTRBIT(&nattrbits, NFSATTRBIT_OWNER); } else { NFSCLRBIT_ATTRBIT(attrbitp, NFSATTRBIT_OWNER); } } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESSSET) && NFSVNO_ISSETATIME(nvap)) { nva.na_atime = nvap->na_atime; change++; NFSSETBIT_ATTRBIT(&nattrbits, NFSATTRBIT_TIMEACCESSSET); } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFYSET) && NFSVNO_ISSETMTIME(nvap)) { nva.na_mtime = nvap->na_mtime; change++; NFSSETBIT_ATTRBIT(&nattrbits, NFSATTRBIT_TIMEMODIFYSET); } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) && NFSVNO_ISSETGID(nvap)) { if (nvap->na_gid == nd->nd_cred->cr_gid || groupmember(nvap->na_gid, nd->nd_cred)) { nd->nd_cred->cr_uid = 0; nva.na_gid = nvap->na_gid; change++; NFSSETBIT_ATTRBIT(&nattrbits, NFSATTRBIT_OWNERGROUP); } else { NFSCLRBIT_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP); } } if (change) { error = nfsvno_setattr(vp, &nva, nd->nd_cred, p, exp); if (error) { NFSCLRALL_ATTRBIT(attrbitp, &nattrbits); } } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) && NFSVNO_ISSETSIZE(nvap) && nvap->na_size != (u_quad_t)0) { NFSCLRBIT_ATTRBIT(attrbitp, NFSATTRBIT_SIZE); } #ifdef NFS4_ACL_EXTATTR_NAME if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL) && nfsrv_useacl != 0 && aclp != NULL) { if (aclp->acl_cnt > 0) { error = nfsrv_setacl(vp, aclp, nd->nd_cred, p); if (error) { NFSCLRBIT_ATTRBIT(attrbitp, NFSATTRBIT_ACL); } } } else #endif NFSCLRBIT_ATTRBIT(attrbitp, NFSATTRBIT_ACL); nd->nd_cred->cr_uid = tuid; out: NFSEXITCODE2(0, nd); } /* * Translate an ASCII hex digit to it's binary value. Return -1 if the * char isn't a hex digit. */ static char nfsrv_hexdigit(char c, int *err) { *err = 0; if (c >= '0' && c <= '9') return (c - '0'); if (c >= 'a' && c <= 'f') return (c - 'a' + ((char)10)); if (c >= 'A' && c <= 'F') return (c - 'A' + ((char)10)); /* Not valid ! */ *err = 1; return (1); /* BOGUS */ } /* * Check to see if NFSERR_MOVED can be returned for this op. Return 1 iff * it can be. */ APPLESTATIC int nfsrv_errmoved(int op) { short *errp; errp = nfsrv_v4errmap[op]; while (*errp != 0) { if (*errp == NFSERR_MOVED) return (1); errp++; } return (0); } /* * Fill in attributes for a Referral. * (Return the number of bytes of XDR created.) */ APPLESTATIC int nfsrv_putreferralattr(struct nfsrv_descript *nd, nfsattrbit_t *retbitp, struct nfsreferral *refp, int getattr, int *reterrp) { u_int32_t *tl, *retnump; u_char *cp, *cp2; int prefixnum, retnum = 0, i, len, bitpos, rderrbit = 0, nonrefbit = 0; int fslocationsbit = 0; nfsattrbit_t tmpbits, refbits; NFSREFERRAL_ATTRBIT(&refbits); if (getattr) NFSCLRBIT_ATTRBIT(&refbits, NFSATTRBIT_RDATTRERROR); else if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_RDATTRERROR)) rderrbit = 1; if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_FSLOCATIONS)) fslocationsbit = 1; /* * Check for the case where unsupported referral attributes are * requested. */ NFSSET_ATTRBIT(&tmpbits, retbitp); NFSCLRALL_ATTRBIT(&tmpbits, &refbits); if (NFSNONZERO_ATTRBIT(&tmpbits)) nonrefbit = 1; if (nonrefbit && !fslocationsbit && (getattr || !rderrbit)) { *reterrp = NFSERR_MOVED; return (0); } /* * Now we can fill in the attributes. */ NFSSET_ATTRBIT(&tmpbits, retbitp); NFSCLRNOT_ATTRBIT(&tmpbits, &refbits); /* * Put out the attribute bitmap for the ones being filled in * and get the field for the number of attributes returned. */ prefixnum = nfsrv_putattrbit(nd, &tmpbits); NFSM_BUILD(retnump, u_int32_t *, NFSX_UNSIGNED); prefixnum += NFSX_UNSIGNED; /* * Now, loop around filling in the attributes for each bit set. */ for (bitpos = 0; bitpos < NFSATTRBIT_MAX; bitpos++) { if (NFSISSET_ATTRBIT(&tmpbits, bitpos)) { switch (bitpos) { case NFSATTRBIT_TYPE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFDIR); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSID: NFSM_BUILD(tl, u_int32_t *, NFSX_V4FSID); *tl++ = 0; *tl++ = txdr_unsigned(NFSV4ROOT_FSID0); *tl++ = 0; *tl = txdr_unsigned(NFSV4ROOT_REFERRAL); retnum += NFSX_V4FSID; break; case NFSATTRBIT_RDATTRERROR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (nonrefbit) *tl = txdr_unsigned(NFSERR_MOVED); else *tl = 0; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSLOCATIONS: retnum += nfsm_strtom(nd, "/", 1); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(refp->nfr_srvcnt); retnum += NFSX_UNSIGNED; cp = refp->nfr_srvlist; for (i = 0; i < refp->nfr_srvcnt; i++) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(1); retnum += NFSX_UNSIGNED; cp2 = STRCHR(cp, ':'); if (cp2 != NULL) len = cp2 - cp; else len = 1; retnum += nfsm_strtom(nd, cp, len); if (cp2 != NULL) cp = cp2 + 1; cp2 = STRCHR(cp, ','); if (cp2 != NULL) len = cp2 - cp; else len = strlen(cp); retnum += nfsm_strtom(nd, cp, len); if (cp2 != NULL) cp = cp2 + 1; } break; case NFSATTRBIT_MOUNTEDONFILEID: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(refp->nfr_dfileno, tl); retnum += NFSX_HYPER; break; default: printf("EEK! Bad V4 refattr bitpos=%d\n", bitpos); } } } *retnump = txdr_unsigned(retnum); return (retnum + prefixnum); } /* * Parse a file name out of a request. */ APPLESTATIC int nfsrv_parsename(struct nfsrv_descript *nd, char *bufp, u_long *hashp, NFSPATHLEN_T *outlenp) { struct mbuf_ext_pgs *pgs; vm_page_t pg; char *fromcp, *tocp, val = '\0'; int i; int rem, len, error = 0, pubtype = 0, outlen = 0, percent = 0; char digit; u_int32_t *tl; u_long hash = 0; if (hashp != NULL) *hashp = 0; tocp = bufp; /* * For V4, check for lookup parent. * Otherwise, get the component name. */ if ((nd->nd_flag & ND_NFSV4) && nd->nd_procnum == NFSV4OP_LOOKUPP) { *tocp++ = '.'; hash += ((u_char)'.'); *tocp++ = '.'; hash += ((u_char)'.'); outlen = 2; } else { /* * First, get the name length. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len > NFS_MAXNAMLEN) { nd->nd_repstat = NFSERR_NAMETOL; error = 0; goto nfsmout; } else if (len <= 0) { nd->nd_repstat = NFSERR_INVAL; error = 0; goto nfsmout; } /* * Now, copy the component name into the buffer. */ fromcp = nd->nd_dpos; if ((nd->nd_md->m_flags & M_NOMAP) != 0) rem = nd->nd_dextpgsiz; else rem = mtod(nd->nd_md, char *) + nd->nd_md->m_len - fromcp; for (i = 0; i < len; i++) { while (rem == 0) { if ((nd->nd_md->m_flags & M_NOMAP) != 0 && nd->nd_dextpg < - nd->nd_md->m_ext.ext_pgs->npgs - 1) { - pgs = nd->nd_md->m_ext.ext_pgs; + nd->nd_md->m_ext_pgs.npgs - 1) { + pgs = &nd->nd_md->m_ext_pgs; pg = PHYS_TO_VM_PAGE( - pgs->pa[nd->nd_dextpg]); + nd->nd_md->m_epg_pa[nd->nd_dextpg]); vm_page_unwire_noq(pg); vm_page_free(pg); for (i = nd->nd_bextpg; i < pgs->npgs - 1; i++) - pgs->pa[i] = pgs->pa[i + 1]; + nd->nd_md->m_epg_pa[i] = + nd->nd_md->m_epg_pa[i + 1]; pgs->npgs--; if (nd->nd_dextpg == 0) pgs->first_pg_off = 0; fromcp = nd->nd_dpos = (char *)(void *) PHYS_TO_DMAP( - pgs->pa[nd->nd_dextpg]); + nd->nd_md->m_epg_pa[nd->nd_dextpg]); rem = nd->nd_dextpgsiz = mbuf_ext_pg_len(pgs, nd->nd_dextpg, 0); } else { if (!nfsm_shiftnext(nd, &rem)) { error = EBADRPC; goto nfsmout; } fromcp = nd->nd_dpos; } } if (*fromcp == '\0') { nd->nd_repstat = EACCES; error = 0; goto nfsmout; } /* * For lookups on the public filehandle, do some special * processing on the name. (The public file handle is the * root of the public file system for this server.) */ if (nd->nd_flag & ND_PUBLOOKUP) { /* * If the first char is ASCII, it is a canonical * path, otherwise it is a native path. (RFC2054 * doesn't actually state what it is if the first * char isn't ASCII or 0x80, so I assume native.) * pubtype == 1 -> native path * pubtype == 2 -> canonical path */ if (i == 0) { if (*fromcp & 0x80) { /* * Since RFC2054 doesn't indicate * that a native path of just 0x80 * isn't allowed, I'll replace the * 0x80 with '/' instead of just * throwing it away. */ *fromcp = '/'; pubtype = 1; } else { pubtype = 2; } } /* * '/' only allowed in a native path */ if (*fromcp == '/' && pubtype != 1) { nd->nd_repstat = EACCES; error = 0; goto nfsmout; } /* * For the special case of 2 hex digits after a * '%' in an absolute path, calculate the value. * percent == 1 -> indicates "get first hex digit" * percent == 2 -> indicates "get second hex digit" */ if (percent > 0) { digit = nfsrv_hexdigit(*fromcp, &error); if (error) { nd->nd_repstat = EACCES; error = 0; goto nfsmout; } if (percent == 1) { val = (digit << 4); percent = 2; } else { val += digit; percent = 0; *tocp++ = val; hash += ((u_char)val); outlen++; } } else { if (*fromcp == '%' && pubtype == 2) { /* * Must be followed by 2 hex digits */ if ((len - i) < 3) { nd->nd_repstat = EACCES; error = 0; goto nfsmout; } percent = 1; } else { *tocp++ = *fromcp; hash += ((u_char)*fromcp); outlen++; } } } else { /* * Normal, non lookup on public, name. */ if (*fromcp == '/') { if (nd->nd_flag & ND_NFSV4) nd->nd_repstat = NFSERR_BADNAME; else nd->nd_repstat = EACCES; error = 0; goto nfsmout; } hash += ((u_char)*fromcp); *tocp++ = *fromcp; outlen++; } fromcp++; rem--; } nd->nd_dpos = fromcp; i = NFSM_RNDUP(len) - len; if (i > 0) { error = nfsm_advance(nd, i, rem); if (error) goto nfsmout; } /* * For v4, don't allow lookups of '.' or '..' and * also check for non-utf8 strings. */ if (nd->nd_flag & ND_NFSV4) { if ((outlen == 1 && bufp[0] == '.') || (outlen == 2 && bufp[0] == '.' && bufp[1] == '.')) { nd->nd_repstat = NFSERR_BADNAME; error = 0; goto nfsmout; } if (enable_checkutf8 == 1 && nfsrv_checkutf8((u_int8_t *)bufp, outlen)) { nd->nd_repstat = NFSERR_INVAL; error = 0; goto nfsmout; } } } *tocp = '\0'; *outlenp = (size_t)outlen; if (hashp != NULL) *hashp = hash; nfsmout: NFSEXITCODE2(error, nd); return (error); } void nfsd_init(void) { int i; static int inited = 0; if (inited) return; inited = 1; /* * Initialize client queues. Don't free/reinitialize * them when nfsds are restarted. */ nfsclienthash = malloc(sizeof(struct nfsclienthashhead) * nfsrv_clienthashsize, M_NFSDCLIENT, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_clienthashsize; i++) LIST_INIT(&nfsclienthash[i]); nfslockhash = malloc(sizeof(struct nfslockhashhead) * nfsrv_lockhashsize, M_NFSDLOCKFILE, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lockhashsize; i++) LIST_INIT(&nfslockhash[i]); nfssessionhash = malloc(sizeof(struct nfssessionhash) * nfsrv_sessionhashsize, M_NFSDSESSION, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_sessionhashsize; i++) { mtx_init(&nfssessionhash[i].mtx, "nfssm", NULL, MTX_DEF); LIST_INIT(&nfssessionhash[i].list); } LIST_INIT(&nfsrv_dontlisthead); TAILQ_INIT(&nfsrv_recalllisthead); /* and the v2 pubfh should be all zeros */ NFSBZERO(nfs_v2pubfh, NFSX_V2FH); } /* * Check the v4 root exports. * Return 0 if ok, 1 otherwise. */ int nfsd_checkrootexp(struct nfsrv_descript *nd) { if ((nd->nd_flag & (ND_GSS | ND_EXAUTHSYS)) == ND_EXAUTHSYS) goto checktls; if ((nd->nd_flag & (ND_GSSINTEGRITY | ND_EXGSSINTEGRITY)) == (ND_GSSINTEGRITY | ND_EXGSSINTEGRITY)) goto checktls; if ((nd->nd_flag & (ND_GSSPRIVACY | ND_EXGSSPRIVACY)) == (ND_GSSPRIVACY | ND_EXGSSPRIVACY)) goto checktls; if ((nd->nd_flag & (ND_GSS | ND_GSSINTEGRITY | ND_GSSPRIVACY | ND_EXGSS)) == (ND_GSS | ND_EXGSS)) goto checktls; return (1); checktls: if ((nd->nd_flag & ND_EXTLS) == 0) return (0); if ((nd->nd_flag & (ND_TLSCERTUSER | ND_EXTLSCERTUSER)) == (ND_TLSCERTUSER | ND_EXTLSCERTUSER)) return (0); if ((nd->nd_flag & (ND_TLSCERT | ND_EXTLSCERT | ND_EXTLSCERTUSER)) == (ND_TLSCERT | ND_EXTLSCERT)) return (0); if ((nd->nd_flag & (ND_TLS | ND_EXTLSCERTUSER | ND_EXTLSCERT)) == ND_TLS) return (0); return (1); } /* * Parse the first part of an NFSv4 compound to find out what the minor * version# is. */ void nfsd_getminorvers(struct nfsrv_descript *nd, u_char *tag, u_char **tagstrp, int *taglenp, u_int32_t *minversp) { uint32_t *tl; int error = 0, taglen = -1; u_char *tagstr = NULL; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); taglen = fxdr_unsigned(int, *tl); if (taglen < 0 || taglen > NFSV4_OPAQUELIMIT) { error = EBADRPC; goto nfsmout; } if (taglen <= NFSV4_SMALLSTR) tagstr = tag; else tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, tagstr, taglen); if (error != 0) goto nfsmout; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); *minversp = fxdr_unsigned(u_int32_t, *tl); *tagstrp = tagstr; if (*minversp == NFSV41_MINORVERSION) nd->nd_flag |= ND_NFSV41; else if (*minversp == NFSV42_MINORVERSION) nd->nd_flag |= (ND_NFSV41 | ND_NFSV42); nfsmout: if (error != 0) { if (tagstr != NULL && taglen > NFSV4_SMALLSTR) free(tagstr, M_TEMP); taglen = -1; } *taglenp = taglen; } Index: projects/nfs-over-tls/sys/kern/kern_mbuf.c =================================================================== --- projects/nfs-over-tls/sys/kern/kern_mbuf.c (revision 360215) +++ projects/nfs-over-tls/sys/kern/kern_mbuf.c (revision 360216) @@ -1,1786 +1,1784 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004, 2005, * Bosko Milekic . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_param.h" #include "opt_kern_tls.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA * Zones. * * Mbuf Clusters (2K, contiguous) are allocated from the Cluster * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the * administrator so desires. * * Mbufs are allocated from a UMA Master Zone called the Mbuf * Zone. * * Additionally, FreeBSD provides a Packet Zone, which it * configures as a Secondary Zone to the Mbuf Master Zone, * thus sharing backend Slab kegs with the Mbuf Master Zone. * * Thus common-case allocations and locking are simplified: * * m_clget() m_getcl() * | | * | .------------>[(Packet Cache)] m_get(), m_gethdr() * | | [ Packet ] | * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] * | \________ | * [ Cluster Keg ] \ / * | [ Mbuf Keg ] * [ Cluster Slabs ] | * | [ Mbuf Slabs ] * \____________(VM)_________________/ * * * Whenever an object is allocated with uma_zalloc() out of * one of the Zones its _ctor_ function is executed. The same * for any deallocation through uma_zfree() the _dtor_ function * is executed. * * Caches are per-CPU and are filled from the Master Zone. * * Whenever an object is allocated from the underlying global * memory pool it gets pre-initialized with the _zinit_ functions. * When the Keg's are overfull objects get decommissioned with * _zfini_ functions and free'd back to the global memory pool. * */ int nmbufs; /* limits number of mbufs */ int nmbclusters; /* limits number of mbuf clusters */ int nmbjumbop; /* limits number of page size jumbo clusters */ int nmbjumbo9; /* limits number of 9k jumbo clusters */ int nmbjumbo16; /* limits number of 16k jumbo clusters */ bool mb_use_ext_pgs; /* use EXT_PGS mbufs for sendfile & TLS */ SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN, &mb_use_ext_pgs, 0, "Use unmapped mbufs for sendfile(2) and TLS offload"); static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0, "Maximum real memory allocatable to various mbuf types"); static counter_u64_t snd_tag_count; SYSCTL_COUNTER_U64(_kern_ipc, OID_AUTO, num_snd_tags, CTLFLAG_RW, &snd_tag_count, "# of active mbuf send tags"); /* * tunable_mbinit() has to be run before any mbuf allocations are done. */ static void tunable_mbinit(void *dummy) { quad_t realmem; /* * The default limit for all mbuf related memory is 1/2 of all * available kernel memory (physical or kmem). * At most it can be 3/4 of available kernel memory. */ realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size); maxmbufmem = realmem / 2; TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); if (maxmbufmem > realmem / 4 * 3) maxmbufmem = realmem / 4 * 3; TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); if (nmbclusters == 0) nmbclusters = maxmbufmem / MCLBYTES / 4; TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); if (nmbjumbop == 0) nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); if (nmbjumbo9 == 0) nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); if (nmbjumbo16 == 0) nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; /* * We need at least as many mbufs as we have clusters of * the various types added together. */ TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) nmbufs = lmax(maxmbufmem / MSIZE / 5, nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); } SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); static int sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) { int error, newnmbclusters; newnmbclusters = nmbclusters; error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); if (error == 0 && req->newptr && newnmbclusters != nmbclusters) { if (newnmbclusters > nmbclusters && nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbclusters = newnmbclusters; nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); EVENTHANDLER_INVOKE(nmbclusters_change); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &nmbclusters, 0, sysctl_nmbclusters, "IU", "Maximum number of mbuf clusters allowed"); static int sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) { int error, newnmbjumbop; newnmbjumbop = nmbjumbop; error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) { if (newnmbjumbop > nmbjumbop && nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbop = newnmbjumbop; nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &nmbjumbop, 0, sysctl_nmbjumbop, "IU", "Maximum number of mbuf page size jumbo clusters allowed"); static int sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) { int error, newnmbjumbo9; newnmbjumbo9 = nmbjumbo9; error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) { if (newnmbjumbo9 > nmbjumbo9 && nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo9 = newnmbjumbo9; nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", "Maximum number of mbuf 9k jumbo clusters allowed"); static int sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) { int error, newnmbjumbo16; newnmbjumbo16 = nmbjumbo16; error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) { if (newnmbjumbo16 > nmbjumbo16 && nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo16 = newnmbjumbo16; nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", "Maximum number of mbuf 16k jumbo clusters allowed"); static int sysctl_nmbufs(SYSCTL_HANDLER_ARGS) { int error, newnmbufs; newnmbufs = nmbufs; error = sysctl_handle_int(oidp, &newnmbufs, 0, req); if (error == 0 && req->newptr && newnmbufs != nmbufs) { if (newnmbufs > nmbufs) { nmbufs = newnmbufs; nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); EVENTHANDLER_INVOKE(nmbufs_change); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &nmbufs, 0, sysctl_nmbufs, "IU", "Maximum number of mbufs allowed"); /* * Zones from which we allocate. */ uma_zone_t zone_mbuf; uma_zone_t zone_clust; uma_zone_t zone_pack; uma_zone_t zone_jumbop; uma_zone_t zone_jumbo9; uma_zone_t zone_jumbo16; /* * Local prototypes. */ static int mb_ctor_mbuf(void *, int, void *, int); static int mb_ctor_clust(void *, int, void *, int); static int mb_ctor_pack(void *, int, void *, int); static void mb_dtor_mbuf(void *, int, void *); static void mb_dtor_pack(void *, int, void *); static int mb_zinit_pack(void *, int, int); static void mb_zfini_pack(void *, int); static void mb_reclaim(uma_zone_t, int); /* Ensure that MSIZE is a power of 2. */ CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); _Static_assert(offsetof(struct mbuf, m_ext) == offsetof(struct mbuf, m_ext_pgs.m_ext), "m_ext offset mismatch between mbuf and ext_pgs"); _Static_assert(sizeof(struct mbuf) <= MSIZE, "size of mbuf exceeds MSIZE"); /* * Initialize FreeBSD Network buffer allocation. */ static void mbuf_init(void *dummy) { /* * Configure UMA zones for Mbufs, Clusters, and Packets. */ zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, mb_ctor_mbuf, mb_dtor_mbuf, NULL, NULL, MSIZE - 1, UMA_ZONE_CONTIG | UMA_ZONE_MAXBUCKET); if (nmbufs > 0) nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); uma_zone_set_maxaction(zone_mbuf, mb_reclaim); zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, mb_ctor_clust, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_CONTIG); if (nmbclusters > 0) nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); uma_zone_set_maxaction(zone_clust, mb_reclaim); zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); /* Make jumbo frame zone too. Page size, 9k and 16k. */ zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, mb_ctor_clust, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_CONTIG); if (nmbjumbop > 0) nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); uma_zone_set_maxaction(zone_jumbop, mb_reclaim); zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, mb_ctor_clust, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_CONTIG); if (nmbjumbo9 > 0) nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); uma_zone_set_maxaction(zone_jumbo9, mb_reclaim); zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, mb_ctor_clust, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_CONTIG); if (nmbjumbo16 > 0) nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); /* * Hook event handler for low-memory situation, used to * drain protocols and push data back to the caches (UMA * later pushes it back to VM). */ EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, EVENTHANDLER_PRI_FIRST); snd_tag_count = counter_u64_alloc(M_WAITOK); } SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); #ifdef DEBUGNET /* * debugnet makes use of a pre-allocated pool of mbufs and clusters. When * debugnet is configured, we initialize a set of UMA cache zones which return * items from this pool. At panic-time, the regular UMA zone pointers are * overwritten with those of the cache zones so that drivers may allocate and * free mbufs and clusters without attempting to allocate physical memory. * * We keep mbufs and clusters in a pair of mbuf queues. In particular, for * the purpose of caching clusters, we treat them as mbufs. */ static struct mbufq dn_mbufq = { STAILQ_HEAD_INITIALIZER(dn_mbufq.mq_head), 0, INT_MAX }; static struct mbufq dn_clustq = { STAILQ_HEAD_INITIALIZER(dn_clustq.mq_head), 0, INT_MAX }; static int dn_clsize; static uma_zone_t dn_zone_mbuf; static uma_zone_t dn_zone_clust; static uma_zone_t dn_zone_pack; static struct debugnet_saved_zones { uma_zone_t dsz_mbuf; uma_zone_t dsz_clust; uma_zone_t dsz_pack; uma_zone_t dsz_jumbop; uma_zone_t dsz_jumbo9; uma_zone_t dsz_jumbo16; bool dsz_debugnet_zones_enabled; } dn_saved_zones; static int dn_buf_import(void *arg, void **store, int count, int domain __unused, int flags) { struct mbufq *q; struct mbuf *m; int i; q = arg; for (i = 0; i < count; i++) { m = mbufq_dequeue(q); if (m == NULL) break; trash_init(m, q == &dn_mbufq ? MSIZE : dn_clsize, flags); store[i] = m; } KASSERT((flags & M_WAITOK) == 0 || i == count, ("%s: ran out of pre-allocated mbufs", __func__)); return (i); } static void dn_buf_release(void *arg, void **store, int count) { struct mbufq *q; struct mbuf *m; int i; q = arg; for (i = 0; i < count; i++) { m = store[i]; (void)mbufq_enqueue(q, m); } } static int dn_pack_import(void *arg __unused, void **store, int count, int domain __unused, int flags __unused) { struct mbuf *m; void *clust; int i; for (i = 0; i < count; i++) { m = m_get(MT_DATA, M_NOWAIT); if (m == NULL) break; clust = uma_zalloc(dn_zone_clust, M_NOWAIT); if (clust == NULL) { m_free(m); break; } mb_ctor_clust(clust, dn_clsize, m, 0); store[i] = m; } KASSERT((flags & M_WAITOK) == 0 || i == count, ("%s: ran out of pre-allocated mbufs", __func__)); return (i); } static void dn_pack_release(void *arg __unused, void **store, int count) { struct mbuf *m; void *clust; int i; for (i = 0; i < count; i++) { m = store[i]; clust = m->m_ext.ext_buf; uma_zfree(dn_zone_clust, clust); uma_zfree(dn_zone_mbuf, m); } } /* * Free the pre-allocated mbufs and clusters reserved for debugnet, and destroy * the corresponding UMA cache zones. */ void debugnet_mbuf_drain(void) { struct mbuf *m; void *item; if (dn_zone_mbuf != NULL) { uma_zdestroy(dn_zone_mbuf); dn_zone_mbuf = NULL; } if (dn_zone_clust != NULL) { uma_zdestroy(dn_zone_clust); dn_zone_clust = NULL; } if (dn_zone_pack != NULL) { uma_zdestroy(dn_zone_pack); dn_zone_pack = NULL; } while ((m = mbufq_dequeue(&dn_mbufq)) != NULL) m_free(m); while ((item = mbufq_dequeue(&dn_clustq)) != NULL) uma_zfree(m_getzone(dn_clsize), item); } /* * Callback invoked immediately prior to starting a debugnet connection. */ void debugnet_mbuf_start(void) { MPASS(!dn_saved_zones.dsz_debugnet_zones_enabled); /* Save the old zone pointers to restore when debugnet is closed. */ dn_saved_zones = (struct debugnet_saved_zones) { .dsz_debugnet_zones_enabled = true, .dsz_mbuf = zone_mbuf, .dsz_clust = zone_clust, .dsz_pack = zone_pack, .dsz_jumbop = zone_jumbop, .dsz_jumbo9 = zone_jumbo9, .dsz_jumbo16 = zone_jumbo16, }; /* * All cluster zones return buffers of the size requested by the * drivers. It's up to the driver to reinitialize the zones if the * MTU of a debugnet-enabled interface changes. */ printf("debugnet: overwriting mbuf zone pointers\n"); zone_mbuf = dn_zone_mbuf; zone_clust = dn_zone_clust; zone_pack = dn_zone_pack; zone_jumbop = dn_zone_clust; zone_jumbo9 = dn_zone_clust; zone_jumbo16 = dn_zone_clust; } /* * Callback invoked when a debugnet connection is closed/finished. */ void debugnet_mbuf_finish(void) { MPASS(dn_saved_zones.dsz_debugnet_zones_enabled); printf("debugnet: restoring mbuf zone pointers\n"); zone_mbuf = dn_saved_zones.dsz_mbuf; zone_clust = dn_saved_zones.dsz_clust; zone_pack = dn_saved_zones.dsz_pack; zone_jumbop = dn_saved_zones.dsz_jumbop; zone_jumbo9 = dn_saved_zones.dsz_jumbo9; zone_jumbo16 = dn_saved_zones.dsz_jumbo16; memset(&dn_saved_zones, 0, sizeof(dn_saved_zones)); } /* * Reinitialize the debugnet mbuf+cluster pool and cache zones. */ void debugnet_mbuf_reinit(int nmbuf, int nclust, int clsize) { struct mbuf *m; void *item; debugnet_mbuf_drain(); dn_clsize = clsize; dn_zone_mbuf = uma_zcache_create("debugnet_" MBUF_MEM_NAME, MSIZE, mb_ctor_mbuf, mb_dtor_mbuf, NULL, NULL, dn_buf_import, dn_buf_release, &dn_mbufq, UMA_ZONE_NOBUCKET); dn_zone_clust = uma_zcache_create("debugnet_" MBUF_CLUSTER_MEM_NAME, clsize, mb_ctor_clust, NULL, NULL, NULL, dn_buf_import, dn_buf_release, &dn_clustq, UMA_ZONE_NOBUCKET); dn_zone_pack = uma_zcache_create("debugnet_" MBUF_PACKET_MEM_NAME, MCLBYTES, mb_ctor_pack, mb_dtor_pack, NULL, NULL, dn_pack_import, dn_pack_release, NULL, UMA_ZONE_NOBUCKET); while (nmbuf-- > 0) { m = m_get(MT_DATA, M_WAITOK); uma_zfree(dn_zone_mbuf, m); } while (nclust-- > 0) { item = uma_zalloc(m_getzone(dn_clsize), M_WAITOK); uma_zfree(dn_zone_clust, item); } } #endif /* DEBUGNET */ /* * Constructor for Mbuf master zone. * * The 'arg' pointer points to a mb_args structure which * contains call-specific information required to support the * mbuf allocation API. See mbuf.h. */ static int mb_ctor_mbuf(void *mem, int size, void *arg, int how) { struct mbuf *m; struct mb_args *args; int error; int flags; short type; args = (struct mb_args *)arg; type = args->type; /* * The mbuf is initialized later. The caller has the * responsibility to set up any MAC labels too. */ if (type == MT_NOINIT) return (0); m = (struct mbuf *)mem; flags = args->flags; MPASS((flags & M_NOFREE) == 0); error = m_init(m, how, type, flags); return (error); } /* * The Mbuf master zone destructor. */ static void mb_dtor_mbuf(void *mem, int size, void *arg) { struct mbuf *m; unsigned long flags; m = (struct mbuf *)mem; flags = (unsigned long)arg; KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); if (!(flags & MB_DTOR_SKIP) && (m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags)) m_tag_delete_chain(m, NULL); } /* * The Mbuf Packet zone destructor. */ static void mb_dtor_pack(void *mem, int size, void *arg) { struct mbuf *m; m = (struct mbuf *)mem; if ((m->m_flags & M_PKTHDR) != 0) m_tag_delete_chain(m, NULL); /* Make sure we've got a clean cluster back. */ KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); #ifdef INVARIANTS trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); #endif /* * If there are processes blocked on zone_clust, waiting for pages * to be freed up, cause them to be woken up by draining the * packet zone. We are exposed to a race here (in the check for * the UMA_ZFLAG_FULL) where we might miss the flag set, but that * is deliberate. We don't want to acquire the zone lock for every * mbuf free. */ if (uma_zone_exhausted(zone_clust)) uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN); } /* * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. * * Here the 'arg' pointer points to the Mbuf which we * are configuring cluster storage for. If 'arg' is * empty we allocate just the cluster without setting * the mbuf to it. See mbuf.h. */ static int mb_ctor_clust(void *mem, int size, void *arg, int how) { struct mbuf *m; m = (struct mbuf *)arg; if (m != NULL) { m->m_ext.ext_buf = (char *)mem; m->m_data = m->m_ext.ext_buf; m->m_flags |= M_EXT; m->m_ext.ext_free = NULL; m->m_ext.ext_arg1 = NULL; m->m_ext.ext_arg2 = NULL; m->m_ext.ext_size = size; m->m_ext.ext_type = m_gettype(size); m->m_ext.ext_flags = EXT_FLAG_EMBREF; m->m_ext.ext_count = 1; } return (0); } /* * The Packet secondary zone's init routine, executed on the * object's transition from mbuf keg slab to zone cache. */ static int mb_zinit_pack(void *mem, int size, int how) { struct mbuf *m; m = (struct mbuf *)mem; /* m is virgin. */ if (uma_zalloc_arg(zone_clust, m, how) == NULL || m->m_ext.ext_buf == NULL) return (ENOMEM); m->m_ext.ext_type = EXT_PACKET; /* Override. */ #ifdef INVARIANTS trash_init(m->m_ext.ext_buf, MCLBYTES, how); #endif return (0); } /* * The Packet secondary zone's fini routine, executed on the * object's transition from zone cache to keg slab. */ static void mb_zfini_pack(void *mem, int size) { struct mbuf *m; m = (struct mbuf *)mem; #ifdef INVARIANTS trash_fini(m->m_ext.ext_buf, MCLBYTES); #endif uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); #ifdef INVARIANTS trash_dtor(mem, size, NULL); #endif } /* * The "packet" keg constructor. */ static int mb_ctor_pack(void *mem, int size, void *arg, int how) { struct mbuf *m; struct mb_args *args; int error, flags; short type; m = (struct mbuf *)mem; args = (struct mb_args *)arg; flags = args->flags; type = args->type; MPASS((flags & M_NOFREE) == 0); #ifdef INVARIANTS trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); #endif error = m_init(m, how, type, flags); /* m_ext is already initialized. */ m->m_data = m->m_ext.ext_buf; m->m_flags = (flags | M_EXT); return (error); } /* * This is the protocol drain routine. Called by UMA whenever any of the * mbuf zones is closed to its limit. * * No locks should be held when this is called. The drain routines have to * presently acquire some locks which raises the possibility of lock order * reversal. */ static void mb_reclaim(uma_zone_t zone __unused, int pending __unused) { struct epoch_tracker et; struct domain *dp; struct protosw *pr; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__); NET_EPOCH_ENTER(et); for (dp = domains; dp != NULL; dp = dp->dom_next) for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_drain != NULL) (*pr->pr_drain)(); NET_EPOCH_EXIT(et); } /* * Free "count" units of I/O from an mbuf chain. They could be held * in EXT_PGS or just as a normal mbuf. This code is intended to be * called in an error path (I/O error, closed connection, etc). */ void mb_free_notready(struct mbuf *m, int count) { int i; for (i = 0; i < count && m != NULL; i++) { if ((m->m_flags & M_EXT) != 0 && m->m_ext.ext_type == EXT_PGS) { m->m_ext_pgs.nrdy--; if (m->m_ext_pgs.nrdy != 0) continue; } m = m_free(m); } KASSERT(i == count, ("Removed only %d items from %p", i, m)); } /* * Compress an unmapped mbuf into a simple mbuf when it holds a small * amount of data. This is used as a DOS defense to avoid having * small packets tie up wired pages, an ext_pgs structure, and an * mbuf. Since this converts the existing mbuf in place, it can only * be used if there are no other references to 'm'. */ int mb_unmapped_compress(struct mbuf *m) { volatile u_int *refcnt; struct mbuf m_temp; /* * Assert that 'm' does not have a packet header. If 'm' had * a packet header, it would only be able to hold MHLEN bytes * and m_data would have to be initialized differently. */ KASSERT((m->m_flags & M_PKTHDR) == 0 && (m->m_flags & M_EXT) && m->m_ext.ext_type == EXT_PGS, ("%s: m %p !M_EXT or !EXT_PGS or M_PKTHDR", __func__, m)); KASSERT(m->m_len <= MLEN, ("m_len too large %p", m)); if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { refcnt = &m->m_ext.ext_count; } else { KASSERT(m->m_ext.ext_cnt != NULL, ("%s: no refcounting pointer on %p", __func__, m)); refcnt = m->m_ext.ext_cnt; } if (*refcnt != 1) return (EBUSY); m_init(&m_temp, M_NOWAIT, MT_DATA, 0); /* copy data out of old mbuf */ m_copydata(m, 0, m->m_len, mtod(&m_temp, char *)); m_temp.m_len = m->m_len; /* Free the backing pages. */ m->m_ext.ext_free(m); /* Turn 'm' into a "normal" mbuf. */ m->m_flags &= ~(M_EXT | M_RDONLY | M_NOMAP); m->m_data = m->m_dat; /* copy data back into m */ m_copydata(&m_temp, 0, m_temp.m_len, mtod(m, char *)); return (0); } /* * These next few routines are used to permit downgrading an unmapped * mbuf to a chain of mapped mbufs. This is used when an interface * doesn't supported unmapped mbufs or if checksums need to be * computed in software. * * Each unmapped mbuf is converted to a chain of mbufs. First, any * TLS header data is stored in a regular mbuf. Second, each page of * unmapped data is stored in an mbuf with an EXT_SFBUF external * cluster. These mbufs use an sf_buf to provide a valid KVA for the * associated physical page. They also hold a reference on the * original EXT_PGS mbuf to ensure the physical page doesn't go away. * Finally, any TLS trailer data is stored in a regular mbuf. * * mb_unmapped_free_mext() is the ext_free handler for the EXT_SFBUF * mbufs. It frees the associated sf_buf and releases its reference * on the original EXT_PGS mbuf. * * _mb_unmapped_to_ext() is a helper function that converts a single * unmapped mbuf into a chain of mbufs. * * mb_unmapped_to_ext() is the public function that walks an mbuf * chain converting any unmapped mbufs to mapped mbufs. It returns * the new chain of unmapped mbufs on success. On failure it frees * the original mbuf chain and returns NULL. */ static void mb_unmapped_free_mext(struct mbuf *m) { struct sf_buf *sf; struct mbuf *old_m; sf = m->m_ext.ext_arg1; sf_buf_free(sf); /* Drop the reference on the backing EXT_PGS mbuf. */ old_m = m->m_ext.ext_arg2; mb_free_ext(old_m); } static struct mbuf * _mb_unmapped_to_ext(struct mbuf *m) { struct mbuf_ext_pgs *ext_pgs; struct mbuf *m_new, *top, *prev, *mref; struct sf_buf *sf; vm_page_t pg; int i, len, off, pglen, pgoff, seglen, segoff; volatile u_int *refcnt; u_int ref_inc = 0; MBUF_EXT_PGS_ASSERT(m); ext_pgs = &m->m_ext_pgs; len = m->m_len; KASSERT(ext_pgs->tls == NULL, ("%s: can't convert TLS mbuf %p", __func__, m)); /* See if this is the mbuf that holds the embedded refcount. */ if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { refcnt = &m->m_ext.ext_count; mref = m; } else { KASSERT(m->m_ext.ext_cnt != NULL, ("%s: no refcounting pointer on %p", __func__, m)); refcnt = m->m_ext.ext_cnt; mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); } /* Skip over any data removed from the front. */ off = mtod(m, vm_offset_t); top = NULL; if (ext_pgs->hdr_len != 0) { if (off >= ext_pgs->hdr_len) { off -= ext_pgs->hdr_len; } else { seglen = ext_pgs->hdr_len - off; segoff = off; seglen = min(seglen, len); off = 0; len -= seglen; m_new = m_get(M_NOWAIT, MT_DATA); if (m_new == NULL) goto fail; m_new->m_len = seglen; prev = top = m_new; memcpy(mtod(m_new, void *), &ext_pgs->m_epg_hdr[segoff], seglen); } } pgoff = ext_pgs->first_pg_off; for (i = 0; i < ext_pgs->npgs && len > 0; i++) { pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff); if (off >= pglen) { off -= pglen; pgoff = 0; continue; } seglen = pglen - off; segoff = pgoff + off; off = 0; seglen = min(seglen, len); len -= seglen; pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]); m_new = m_get(M_NOWAIT, MT_DATA); if (m_new == NULL) goto fail; if (top == NULL) { top = prev = m_new; } else { prev->m_next = m_new; prev = m_new; } sf = sf_buf_alloc(pg, SFB_NOWAIT); if (sf == NULL) goto fail; ref_inc++; m_extadd(m_new, (char *)sf_buf_kva(sf), PAGE_SIZE, mb_unmapped_free_mext, sf, mref, M_RDONLY, EXT_SFBUF); m_new->m_data += segoff; m_new->m_len = seglen; pgoff = 0; }; if (len != 0) { KASSERT((off + len) <= ext_pgs->trail_len, ("off + len > trail (%d + %d > %d)", off, len, ext_pgs->trail_len)); m_new = m_get(M_NOWAIT, MT_DATA); if (m_new == NULL) goto fail; if (top == NULL) top = m_new; else prev->m_next = m_new; m_new->m_len = len; memcpy(mtod(m_new, void *), &ext_pgs->m_epg_trail[off], len); } if (ref_inc != 0) { /* * Obtain an additional reference on the old mbuf for * each created EXT_SFBUF mbuf. They will be dropped * in mb_unmapped_free_mext(). */ if (*refcnt == 1) *refcnt += ref_inc; else atomic_add_int(refcnt, ref_inc); } m_free(m); return (top); fail: if (ref_inc != 0) { /* * Obtain an additional reference on the old mbuf for * each created EXT_SFBUF mbuf. They will be * immediately dropped when these mbufs are freed * below. */ if (*refcnt == 1) *refcnt += ref_inc; else atomic_add_int(refcnt, ref_inc); } m_free(m); m_freem(top); return (NULL); } struct mbuf * mb_unmapped_to_ext(struct mbuf *top) { struct mbuf *m, *next, *prev = NULL; prev = NULL; for (m = top; m != NULL; m = next) { /* m might be freed, so cache the next pointer. */ next = m->m_next; if (m->m_flags & M_NOMAP) { if (prev != NULL) { /* * Remove 'm' from the new chain so * that the 'top' chain terminates * before 'm' in case 'top' is freed * due to an error. */ prev->m_next = NULL; } m = _mb_unmapped_to_ext(m); if (m == NULL) { m_freem(top); m_freem(next); return (NULL); } if (prev == NULL) { top = m; } else { prev->m_next = m; } /* * Replaced one mbuf with a chain, so we must * find the end of chain. */ prev = m_last(m); } else { if (prev != NULL) { prev->m_next = m; } prev = m; } } return (top); } /* * Allocate an empty EXT_PGS mbuf. The ext_free routine is * responsible for freeing any pages backing this mbuf when it is * freed. */ struct mbuf * mb_alloc_ext_pgs(int how, m_ext_free_t ext_free) { struct mbuf *m; struct mbuf_ext_pgs *ext_pgs; m = m_get(how, MT_DATA); if (m == NULL) return (NULL); ext_pgs = &m->m_ext_pgs; ext_pgs->npgs = 0; ext_pgs->nrdy = 0; ext_pgs->first_pg_off = 0; ext_pgs->last_pg_len = 0; ext_pgs->flags = 0; ext_pgs->hdr_len = 0; ext_pgs->trail_len = 0; ext_pgs->tls = NULL; ext_pgs->so = NULL; m->m_data = NULL; m->m_flags |= (M_EXT | M_RDONLY | M_NOMAP); m->m_ext.ext_type = EXT_PGS; m->m_ext.ext_flags = EXT_FLAG_EMBREF; m->m_ext.ext_count = 1; m->m_ext.ext_size = 0; m->m_ext.ext_free = ext_free; return (m); } #ifdef INVARIANT_SUPPORT void mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs) { /* * NB: This expects a non-empty buffer (npgs > 0 and * last_pg_len > 0). */ KASSERT(ext_pgs->npgs > 0, ("ext_pgs with no valid pages: %p", ext_pgs)); KASSERT(ext_pgs->npgs <= nitems(ext_pgs->m_epg_pa), ("ext_pgs with too many pages: %p", ext_pgs)); KASSERT(ext_pgs->nrdy <= ext_pgs->npgs, ("ext_pgs with too many ready pages: %p", ext_pgs)); KASSERT(ext_pgs->first_pg_off < PAGE_SIZE, ("ext_pgs with too large page offset: %p", ext_pgs)); KASSERT(ext_pgs->last_pg_len > 0, ("ext_pgs with zero last page length: %p", ext_pgs)); KASSERT(ext_pgs->last_pg_len <= PAGE_SIZE, ("ext_pgs with too large last page length: %p", ext_pgs)); if (ext_pgs->npgs == 1) { KASSERT(ext_pgs->first_pg_off + ext_pgs->last_pg_len <= PAGE_SIZE, ("ext_pgs with single page too large: %p", ext_pgs)); } KASSERT(ext_pgs->hdr_len <= sizeof(ext_pgs->m_epg_hdr), ("ext_pgs with too large header length: %p", ext_pgs)); KASSERT(ext_pgs->trail_len <= sizeof(ext_pgs->m_epg_trail), ("ext_pgs with too large header length: %p", ext_pgs)); } #endif /* * Clean up after mbufs with M_EXT storage attached to them if the * reference count hits 1. */ void mb_free_ext(struct mbuf *m) { volatile u_int *refcnt; struct mbuf *mref; int freembuf; KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); /* See if this is the mbuf that holds the embedded refcount. */ if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { refcnt = &m->m_ext.ext_count; mref = m; } else { KASSERT(m->m_ext.ext_cnt != NULL, ("%s: no refcounting pointer on %p", __func__, m)); refcnt = m->m_ext.ext_cnt; mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); } /* * Check if the header is embedded in the cluster. It is * important that we can't touch any of the mbuf fields * after we have freed the external storage, since mbuf * could have been embedded in it. For now, the mbufs * embedded into the cluster are always of type EXT_EXTREF, * and for this type we won't free the mref. */ if (m->m_flags & M_NOFREE) { freembuf = 0; KASSERT(m->m_ext.ext_type == EXT_EXTREF || m->m_ext.ext_type == EXT_RXRING, ("%s: no-free mbuf %p has wrong type", __func__, m)); } else freembuf = 1; /* Free attached storage if this mbuf is the only reference to it. */ if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) { switch (m->m_ext.ext_type) { case EXT_PACKET: /* The packet zone is special. */ if (*refcnt == 0) *refcnt = 1; uma_zfree(zone_pack, mref); break; case EXT_CLUSTER: uma_zfree(zone_clust, m->m_ext.ext_buf); uma_zfree(zone_mbuf, mref); break; case EXT_JUMBOP: uma_zfree(zone_jumbop, m->m_ext.ext_buf); uma_zfree(zone_mbuf, mref); break; case EXT_JUMBO9: uma_zfree(zone_jumbo9, m->m_ext.ext_buf); uma_zfree(zone_mbuf, mref); break; case EXT_JUMBO16: uma_zfree(zone_jumbo16, m->m_ext.ext_buf); uma_zfree(zone_mbuf, mref); break; case EXT_PGS: { #ifdef KERN_TLS struct mbuf_ext_pgs *pgs; struct ktls_session *tls; #endif KASSERT(mref->m_ext.ext_free != NULL, ("%s: ext_free not set", __func__)); mref->m_ext.ext_free(mref); #ifdef KERN_TLS pgs = &mref->m_ext_pgs; tls = pgs->tls; if (tls != NULL && !refcount_release_if_not_last(&tls->refcount)) ktls_enqueue_to_free(pgs); else #endif uma_zfree(zone_mbuf, mref); break; } case EXT_SFBUF: case EXT_NET_DRV: case EXT_MOD_TYPE: case EXT_DISPOSABLE: KASSERT(mref->m_ext.ext_free != NULL, ("%s: ext_free not set", __func__)); mref->m_ext.ext_free(mref); uma_zfree(zone_mbuf, mref); break; case EXT_EXTREF: KASSERT(m->m_ext.ext_free != NULL, ("%s: ext_free not set", __func__)); m->m_ext.ext_free(m); break; case EXT_RXRING: KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free is set", __func__)); break; default: KASSERT(m->m_ext.ext_type == 0, ("%s: unknown ext_type", __func__)); } } if (freembuf && m != mref) uma_zfree(zone_mbuf, m); } /* * Official mbuf(9) allocation KPI for stack and drivers: * * m_get() - a single mbuf without any attachments, sys/mbuf.h. * m_gethdr() - a single mbuf initialized as M_PKTHDR, sys/mbuf.h. * m_getcl() - an mbuf + 2k cluster, sys/mbuf.h. * m_clget() - attach cluster to already allocated mbuf. * m_cljget() - attach jumbo cluster to already allocated mbuf. * m_get2() - allocate minimum mbuf that would fit size argument. * m_getm2() - allocate a chain of mbufs/clusters. * m_extadd() - attach external cluster to mbuf. * * m_free() - free single mbuf with its tags and ext, sys/mbuf.h. * m_freem() - free chain of mbufs. */ int m_clget(struct mbuf *m, int how) { KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", __func__, m)); m->m_ext.ext_buf = (char *)NULL; uma_zalloc_arg(zone_clust, m, how); /* * On a cluster allocation failure, drain the packet zone and retry, * we might be able to loosen a few clusters up on the drain. */ if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN); uma_zalloc_arg(zone_clust, m, how); } MBUF_PROBE2(m__clget, m, how); return (m->m_flags & M_EXT); } /* * m_cljget() is different from m_clget() as it can allocate clusters without * attaching them to an mbuf. In that case the return value is the pointer * to the cluster of the requested size. If an mbuf was specified, it gets * the cluster attached to it and the return value can be safely ignored. * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. */ void * m_cljget(struct mbuf *m, int how, int size) { uma_zone_t zone; void *retval; if (m != NULL) { KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", __func__, m)); m->m_ext.ext_buf = NULL; } zone = m_getzone(size); retval = uma_zalloc_arg(zone, m, how); MBUF_PROBE4(m__cljget, m, how, size, retval); return (retval); } /* * m_get2() allocates minimum mbuf that would fit "size" argument. */ struct mbuf * m_get2(int size, int how, short type, int flags) { struct mb_args args; struct mbuf *m, *n; args.flags = flags; args.type = type; if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) return (uma_zalloc_arg(zone_mbuf, &args, how)); if (size <= MCLBYTES) return (uma_zalloc_arg(zone_pack, &args, how)); if (size > MJUMPAGESIZE) return (NULL); m = uma_zalloc_arg(zone_mbuf, &args, how); if (m == NULL) return (NULL); n = uma_zalloc_arg(zone_jumbop, m, how); if (n == NULL) { uma_zfree(zone_mbuf, m); return (NULL); } return (m); } /* * m_getjcl() returns an mbuf with a cluster of the specified size attached. * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. */ struct mbuf * m_getjcl(int how, short type, int flags, int size) { struct mb_args args; struct mbuf *m, *n; uma_zone_t zone; if (size == MCLBYTES) return m_getcl(how, type, flags); args.flags = flags; args.type = type; m = uma_zalloc_arg(zone_mbuf, &args, how); if (m == NULL) return (NULL); zone = m_getzone(size); n = uma_zalloc_arg(zone, m, how); if (n == NULL) { uma_zfree(zone_mbuf, m); return (NULL); } return (m); } /* * Allocate a given length worth of mbufs and/or clusters (whatever fits * best) and return a pointer to the top of the allocated chain. If an * existing mbuf chain is provided, then we will append the new chain * to the existing one and return a pointer to the provided mbuf. */ struct mbuf * m_getm2(struct mbuf *m, int len, int how, short type, int flags) { struct mbuf *mb, *nm = NULL, *mtail = NULL; KASSERT(len >= 0, ("%s: len is < 0", __func__)); /* Validate flags. */ flags &= (M_PKTHDR | M_EOR); /* Packet header mbuf must be first in chain. */ if ((flags & M_PKTHDR) && m != NULL) flags &= ~M_PKTHDR; /* Loop and append maximum sized mbufs to the chain tail. */ while (len > 0) { if (len > MCLBYTES) mb = m_getjcl(how, type, (flags & M_PKTHDR), MJUMPAGESIZE); else if (len >= MINCLSIZE) mb = m_getcl(how, type, (flags & M_PKTHDR)); else if (flags & M_PKTHDR) mb = m_gethdr(how, type); else mb = m_get(how, type); /* Fail the whole operation if one mbuf can't be allocated. */ if (mb == NULL) { if (nm != NULL) m_freem(nm); return (NULL); } /* Book keeping. */ len -= M_SIZE(mb); if (mtail != NULL) mtail->m_next = mb; else nm = mb; mtail = mb; flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ } if (flags & M_EOR) mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ /* If mbuf was supplied, append new chain to the end of it. */ if (m != NULL) { for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) ; mtail->m_next = nm; mtail->m_flags &= ~M_EOR; } else m = nm; return (m); } /*- * Configure a provided mbuf to refer to the provided external storage * buffer and setup a reference count for said buffer. * * Arguments: * mb The existing mbuf to which to attach the provided buffer. * buf The address of the provided external storage buffer. * size The size of the provided buffer. * freef A pointer to a routine that is responsible for freeing the * provided external storage buffer. * args A pointer to an argument structure (of any type) to be passed * to the provided freef routine (may be NULL). * flags Any other flags to be passed to the provided mbuf. * type The type that the external storage buffer should be * labeled with. * * Returns: * Nothing. */ void m_extadd(struct mbuf *mb, char *buf, u_int size, m_ext_free_t freef, void *arg1, void *arg2, int flags, int type) { KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); mb->m_flags |= (M_EXT | flags); mb->m_ext.ext_buf = buf; mb->m_data = mb->m_ext.ext_buf; mb->m_ext.ext_size = size; mb->m_ext.ext_free = freef; mb->m_ext.ext_arg1 = arg1; mb->m_ext.ext_arg2 = arg2; mb->m_ext.ext_type = type; if (type != EXT_EXTREF) { mb->m_ext.ext_count = 1; mb->m_ext.ext_flags = EXT_FLAG_EMBREF; } else mb->m_ext.ext_flags = 0; } /* * Free an entire chain of mbufs and associated external buffers, if * applicable. */ void m_freem(struct mbuf *mb) { MBUF_PROBE1(m__freem, mb); while (mb != NULL) mb = m_free(mb); } void m_snd_tag_init(struct m_snd_tag *mst, struct ifnet *ifp) { if_ref(ifp); mst->ifp = ifp; refcount_init(&mst->refcount, 1); counter_u64_add(snd_tag_count, 1); } void m_snd_tag_destroy(struct m_snd_tag *mst) { struct ifnet *ifp; ifp = mst->ifp; ifp->if_snd_tag_free(mst); if_rele(ifp); counter_u64_add(snd_tag_count, -1); } /* * Allocate an mbuf with anonymous external pages. */ struct mbuf * -mb_alloc_ext_plus_pages(int len, int how, bool pkthdr, m_ext_free_t ext_free) +mb_alloc_ext_plus_pages(int len, int how, m_ext_free_t ext_free) { struct mbuf *m; vm_page_t pg; int i, npgs; - m = mb_alloc_ext_pgs(how, pkthdr, ext_free); + m = mb_alloc_ext_pgs(how, ext_free); if (m == NULL) return (NULL); - m->m_ext.ext_pgs->flags |= MBUF_PEXT_FLAG_ANON; + m->m_ext_pgs.flags |= MBUF_PEXT_FLAG_ANON; npgs = howmany(len, PAGE_SIZE); for (i = 0; i < npgs; i++) { do { pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP | VM_ALLOC_WIRED); if (pg == NULL) { if (how == M_NOWAIT) { - m->m_ext.ext_pgs->npgs = i; + m->m_ext_pgs.npgs = i; m_free(m); return (NULL); } vm_wait(NULL); } } while (pg == NULL); - m->m_ext.ext_pgs->pa[i] = VM_PAGE_TO_PHYS(pg); + m->m_epg_pa[i] = VM_PAGE_TO_PHYS(pg); } - m->m_ext.ext_pgs->npgs = npgs; + m->m_ext_pgs.npgs = npgs; return (m); } /* * Copy the data in the mbuf chain to a chain of mbufs with external pages. * len is the length of data in the input mbuf chain. * mlen is the maximum number of bytes put into each ext_page mbuf. */ struct mbuf * -mb_copym_ext_pgs(struct mbuf *mp, int len, int mlen, int how, bool pkthdr, +mb_copym_ext_pgs(struct mbuf *mp, int len, int mlen, int how, m_ext_free_t ext_free, struct mbuf **mlast) { struct mbuf *m, *mout = NULL; char *pgpos = NULL, *mbpos; int i = 0, mblen, mbufsiz, pglen, xfer; if (len == 0) return (NULL); m = NULL; pglen = mblen = 0; do { if (pglen == 0) { - if (m == NULL || ++i == m->m_ext.ext_pgs->npgs) { + if (m == NULL || ++i == m->m_ext_pgs.npgs) { mbufsiz = min(mlen, len); if (m == NULL) { m = mout = mb_alloc_ext_plus_pages( - mbufsiz, how, pkthdr, ext_free); + mbufsiz, how, ext_free); if (m == NULL) return (m); - if (pkthdr) - m->m_pkthdr.len = len; } else { - m->m_ext.ext_pgs->last_pg_len = + m->m_ext_pgs.last_pg_len = PAGE_SIZE; m->m_next = mb_alloc_ext_plus_pages( - mbufsiz, how, false, ext_free); + mbufsiz, how, ext_free); m = m->m_next; if (m == NULL) { m_freem(mout); return (m); } } i = 0; } pgpos = (char *)(void *) - PHYS_TO_DMAP(m->m_ext.ext_pgs->pa[i]); + PHYS_TO_DMAP(m->m_epg_pa[i]); pglen = PAGE_SIZE; } while (mblen == 0) { if (mp == NULL) { m_freem(mout); return (NULL); } KASSERT((mp->m_flags & M_NOMAP) == 0, ("mb_copym_ext_pgs: ext_pgs input mbuf")); mbpos = mtod(mp, char *); mblen = mp->m_len; mp = mp->m_next; } xfer = min(mblen, pglen); bcopy(mbpos, pgpos, xfer); pgpos += xfer; mbpos += xfer; pglen -= xfer; mblen -= xfer; len -= xfer; m->m_len += xfer; } while (len > 0); - m->m_ext.ext_pgs->last_pg_len = PAGE_SIZE - pglen; + m->m_ext_pgs.last_pg_len = PAGE_SIZE - pglen; if (mlast != NULL) *mlast = m; return (mout); } /* * Split an ext_pgs mbuf list into two lists at len bytes. * Similar to m_split(), but for ext_pgs mbufs with * anonymous pages. */ struct mbuf * mb_splitatpos_ext(struct mbuf *m0, int len, int how) { struct mbuf *m, *mp; struct mbuf_ext_pgs *pgs, *pgs0; vm_page_t pg; int i, j, left, pgno, plen, trim; char *cp, *cp0; /* Nothing to do. */ if (len == 0) return (NULL); /* Find the correct mbuf to split at. */ for (mp = m0; mp != NULL && len > mp->m_len; mp = mp->m_next) len -= mp->m_len; if (mp == NULL) return (NULL); /* If len == mp->m_len, we can just split the mbuf list. */ if (len == mp->m_len) { m = mp->m_next; mp->m_next = NULL; return (m); } /* Find the page to split at. */ KASSERT((mp->m_flags & (M_EXT | M_NOMAP)) == (M_EXT | M_NOMAP), ("mb_splitatpos_ext: m0 not ext_pgs")); - pgs = mp->m_ext.ext_pgs; + pgs = &mp->m_ext_pgs; KASSERT((pgs->flags & MBUF_PEXT_FLAG_ANON) != 0, ("mb_splitatpos_ext: not anonymous pages")); pgno = 0; left = len; do { if (pgno == 0) plen = mbuf_ext_pg_len(pgs, 0, pgs->first_pg_off); else plen = mbuf_ext_pg_len(pgs, pgno, 0); if (left <= plen) break; left -= plen; pgno++; } while (pgno < pgs->npgs); if (pgno == pgs->npgs) panic("mb_splitatpos_ext"); mp->m_len = len; - m = mb_alloc_ext_pgs(how, false, mb_free_mext_pgs); + m = mb_alloc_ext_pgs(how, mb_free_mext_pgs); if (m == NULL) return (NULL); - pgs0 = m->m_ext.ext_pgs; + pgs0 = &m->m_ext_pgs; pgs0->flags |= MBUF_PEXT_FLAG_ANON; /* * If left < plen, allocate a new page for the new mbuf * and copy the data after left in the page to this new * page. */ if (left < plen) { do { pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP | VM_ALLOC_WIRED); if (pg == NULL) { if (how == M_NOWAIT) { m_free(m); return (NULL); } vm_wait(NULL); } } while (pg == NULL); - pgs0->pa[0] = VM_PAGE_TO_PHYS(pg); + m->m_epg_pa[0] = VM_PAGE_TO_PHYS(pg); pgs0->npgs++; trim = plen - left; - cp = (char *)(void *)PHYS_TO_DMAP(pgs->pa[pgno]); - cp0 = (char *)(void *)PHYS_TO_DMAP(pgs0->pa[0]); + cp = (char *)(void *)PHYS_TO_DMAP(mp->m_epg_pa[pgno]); + cp0 = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[0]); if (pgno == 0) cp += pgs->first_pg_off; cp += left; if (pgno == pgs->npgs - 1) pgs0->last_pg_len = trim; else { pgs0->last_pg_len = pgs->last_pg_len; pgs0->first_pg_off = PAGE_SIZE - trim; cp0 += PAGE_SIZE - trim; } memcpy(cp0, cp, trim); m->m_len = trim; } else pgs0->last_pg_len = pgs->last_pg_len; /* Move the pages beyond pgno to the new mbuf. */ for (i = pgno + 1, j = pgs0->npgs; i < pgs->npgs; i++, j++) { - pgs0->pa[j] = pgs->pa[i]; + m->m_epg_pa[j] = mp->m_epg_pa[i]; /* Never moves page 0. */ m->m_len += mbuf_ext_pg_len(pgs, i, 0); } pgs0->npgs = j; pgs->npgs = pgno + 1; /* Can now update pgs->last_pg_len. */ pgs->last_pg_len = left; m->m_next = mp->m_next; mp->m_next = NULL; return (m); } Index: projects/nfs-over-tls/sys/rpc/clnt_vc.c =================================================================== --- projects/nfs-over-tls/sys/rpc/clnt_vc.c (revision 360215) +++ projects/nfs-over-tls/sys/rpc/clnt_vc.c (revision 360216) @@ -1,1158 +1,1158 @@ /* $NetBSD: clnt_vc.c,v 1.4 2000/07/14 08:40:42 fvdl Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)clnt_tcp.c 1.37 87/10/05 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)clnt_tcp.c 2.2 88/08/01 4.0 RPCSRC"; static char sccsid3[] = "@(#)clnt_vc.c 1.19 89/03/16 Copyr 1988 Sun Micro"; #endif #include __FBSDID("$FreeBSD$"); /* * clnt_tcp.c, Implements a TCP/IP based, client side RPC. * * Copyright (C) 1984, Sun Microsystems, Inc. * * TCP based RPC supports 'batched calls'. * A sequence of calls may be batched-up in a send buffer. The rpc call * return immediately to the client even though the call was not necessarily * sent. The batching occurs if the results' xdr routine is NULL (0) AND * the rpc timeout value is zero (see clnt.h, rpc). * * Clients should NOT casually batch calls that in fact return results; that is, * the server side should be aware that a call is batched and not produce any * return message. Batched calls that produce many result messages can * deadlock (netlock) the client and the server.... * * Now go hang yourself. */ #include "opt_kern_tls.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KERN_TLS extern u_int ktls_maxlen; #endif struct cmessage { struct cmsghdr cmsg; struct cmsgcred cmcred; }; static enum clnt_stat clnt_vc_call(CLIENT *, struct rpc_callextra *, rpcproc_t, struct mbuf *, struct mbuf **, struct timeval); static void clnt_vc_geterr(CLIENT *, struct rpc_err *); static bool_t clnt_vc_freeres(CLIENT *, xdrproc_t, void *); static void clnt_vc_abort(CLIENT *); static bool_t clnt_vc_control(CLIENT *, u_int, void *); static void clnt_vc_close(CLIENT *); static void clnt_vc_destroy(CLIENT *); static bool_t time_not_ok(struct timeval *); static int clnt_vc_soupcall(struct socket *so, void *arg, int waitflag); static struct clnt_ops clnt_vc_ops = { .cl_call = clnt_vc_call, .cl_abort = clnt_vc_abort, .cl_geterr = clnt_vc_geterr, .cl_freeres = clnt_vc_freeres, .cl_close = clnt_vc_close, .cl_destroy = clnt_vc_destroy, .cl_control = clnt_vc_control }; static void clnt_vc_upcallsdone(struct ct_data *); static int fake_wchan; /* * Create a client handle for a connection. * Default options are set, which the user can change using clnt_control()'s. * The rpc/vc package does buffering similar to stdio, so the client * must pick send and receive buffer sizes, 0 => use the default. * NB: fd is copied into a private area. * NB: The rpch->cl_auth is set null authentication. Caller may wish to * set this something more useful. * * fd should be an open socket */ CLIENT * clnt_vc_create( struct socket *so, /* open file descriptor */ struct sockaddr *raddr, /* servers address */ const rpcprog_t prog, /* program number */ const rpcvers_t vers, /* version number */ size_t sendsz, /* buffer recv size */ size_t recvsz, /* buffer send size */ int intrflag) /* interruptible */ { CLIENT *cl; /* client handle */ struct ct_data *ct = NULL; /* client handle */ struct timeval now; struct rpc_msg call_msg; static uint32_t disrupt; struct __rpc_sockinfo si; XDR xdrs; int error, interrupted, one = 1, sleep_flag; struct sockopt sopt; if (disrupt == 0) disrupt = (uint32_t)(long)raddr; cl = (CLIENT *)mem_alloc(sizeof (*cl)); ct = (struct ct_data *)mem_alloc(sizeof (*ct)); mtx_init(&ct->ct_lock, "ct->ct_lock", NULL, MTX_DEF); ct->ct_threads = 0; ct->ct_closing = FALSE; ct->ct_closed = FALSE; ct->ct_upcallrefs = 0; if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { error = soconnect(so, raddr, curthread); SOCK_LOCK(so); interrupted = 0; sleep_flag = PSOCK; if (intrflag != 0) sleep_flag |= PCATCH; while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { error = msleep(&so->so_timeo, SOCK_MTX(so), sleep_flag, "connec", 0); if (error) { if (error == EINTR || error == ERESTART) interrupted = 1; break; } } if (error == 0) { error = so->so_error; so->so_error = 0; } SOCK_UNLOCK(so); if (error) { if (!interrupted) so->so_state &= ~SS_ISCONNECTING; rpc_createerr.cf_stat = RPC_SYSTEMERROR; rpc_createerr.cf_error.re_errno = error; goto err; } } if (!__rpc_socket2sockinfo(so, &si)) { goto err; } if (so->so_proto->pr_flags & PR_CONNREQUIRED) { bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_KEEPALIVE; sopt.sopt_val = &one; sopt.sopt_valsize = sizeof(one); sosetopt(so, &sopt); } if (so->so_proto->pr_protocol == IPPROTO_TCP) { bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_NODELAY; sopt.sopt_val = &one; sopt.sopt_valsize = sizeof(one); sosetopt(so, &sopt); } ct->ct_closeit = FALSE; /* * Set up private data struct */ ct->ct_socket = so; ct->ct_wait.tv_sec = -1; ct->ct_wait.tv_usec = -1; memcpy(&ct->ct_addr, raddr, raddr->sa_len); /* * Initialize call message */ getmicrotime(&now); ct->ct_xid = ((uint32_t)++disrupt) ^ __RPC_GETXID(&now); call_msg.rm_xid = ct->ct_xid; call_msg.rm_direction = CALL; call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; call_msg.rm_call.cb_prog = (uint32_t)prog; call_msg.rm_call.cb_vers = (uint32_t)vers; /* * pre-serialize the static part of the call msg and stash it away */ xdrmem_create(&xdrs, ct->ct_mcallc, MCALL_MSG_SIZE, XDR_ENCODE); if (! xdr_callhdr(&xdrs, &call_msg)) { if (ct->ct_closeit) { soclose(ct->ct_socket); } goto err; } ct->ct_mpos = XDR_GETPOS(&xdrs); XDR_DESTROY(&xdrs); ct->ct_waitchan = "rpcrecv"; ct->ct_waitflag = 0; /* * Create a client handle which uses xdrrec for serialization * and authnone for authentication. */ sendsz = __rpc_get_t_size(si.si_af, si.si_proto, (int)sendsz); recvsz = __rpc_get_t_size(si.si_af, si.si_proto, (int)recvsz); error = soreserve(ct->ct_socket, sendsz, recvsz); if (error != 0) { if (ct->ct_closeit) { soclose(ct->ct_socket); } goto err; } cl->cl_refs = 1; cl->cl_ops = &clnt_vc_ops; cl->cl_private = ct; cl->cl_auth = authnone_create(); SOCKBUF_LOCK(&ct->ct_socket->so_rcv); soupcall_set(ct->ct_socket, SO_RCV, clnt_vc_soupcall, ct); SOCKBUF_UNLOCK(&ct->ct_socket->so_rcv); ct->ct_record = NULL; ct->ct_record_resid = 0; TAILQ_INIT(&ct->ct_pending); return (cl); err: mtx_destroy(&ct->ct_lock); mem_free(ct, sizeof (struct ct_data)); mem_free(cl, sizeof (CLIENT)); return ((CLIENT *)NULL); } static enum clnt_stat clnt_vc_call( CLIENT *cl, /* client handle */ struct rpc_callextra *ext, /* call metadata */ rpcproc_t proc, /* procedure number */ struct mbuf *args, /* pointer to args */ struct mbuf **resultsp, /* pointer to results */ struct timeval utimeout) { struct ct_data *ct = (struct ct_data *) cl->cl_private; AUTH *auth; struct rpc_err *errp; enum clnt_stat stat; XDR xdrs; struct rpc_msg reply_msg; bool_t ok; int nrefreshes = 2; /* number of times to refresh cred */ struct timeval timeout; uint32_t xid; struct mbuf *mreq = NULL, *results; struct ct_request *cr; int error, maxextsiz, trycnt; cr = malloc(sizeof(struct ct_request), M_RPC, M_WAITOK); mtx_lock(&ct->ct_lock); if (ct->ct_closing || ct->ct_closed) { mtx_unlock(&ct->ct_lock); free(cr, M_RPC); return (RPC_CANTSEND); } ct->ct_threads++; if (ext) { auth = ext->rc_auth; errp = &ext->rc_err; } else { auth = cl->cl_auth; errp = &ct->ct_error; } cr->cr_mrep = NULL; cr->cr_error = 0; if (ct->ct_wait.tv_usec == -1) { timeout = utimeout; /* use supplied timeout */ } else { timeout = ct->ct_wait; /* use default timeout */ } /* * After 15sec of looping, allow it to return RPC_CANTSEND, which will * cause the clnt_reconnect layer to create a new TCP connection. */ trycnt = 15 * hz; call_again: mtx_assert(&ct->ct_lock, MA_OWNED); if (ct->ct_closing || ct->ct_closed) { ct->ct_threads--; wakeup(ct); mtx_unlock(&ct->ct_lock); free(cr, M_RPC); return (RPC_CANTSEND); } ct->ct_xid++; xid = ct->ct_xid; mtx_unlock(&ct->ct_lock); /* * Leave space to pre-pend the record mark. */ mreq = m_gethdr(M_WAITOK, MT_DATA); mreq->m_data += sizeof(uint32_t); KASSERT(ct->ct_mpos + sizeof(uint32_t) <= MHLEN, ("RPC header too big")); bcopy(ct->ct_mcallc, mreq->m_data, ct->ct_mpos); mreq->m_len = ct->ct_mpos; /* * The XID is the first thing in the request. */ *mtod(mreq, uint32_t *) = htonl(xid); xdrmbuf_create(&xdrs, mreq, XDR_ENCODE); errp->re_status = stat = RPC_SUCCESS; if ((! XDR_PUTINT32(&xdrs, &proc)) || (! AUTH_MARSHALL(auth, xid, &xdrs, m_copym(args, 0, M_COPYALL, M_WAITOK)))) { errp->re_status = stat = RPC_CANTENCODEARGS; mtx_lock(&ct->ct_lock); goto out; } mreq->m_pkthdr.len = m_length(mreq, NULL); /* * Prepend a record marker containing the packet length. */ M_PREPEND(mreq, sizeof(uint32_t), M_WAITOK); *mtod(mreq, uint32_t *) = htonl(0x80000000 | (mreq->m_pkthdr.len - sizeof(uint32_t))); cr->cr_xid = xid; mtx_lock(&ct->ct_lock); /* * Check to see if the other end has already started to close down * the connection. The upcall will have set ct_error.re_status * to RPC_CANTRECV if this is the case. * If the other end starts to close down the connection after this * point, it will be detected later when cr_error is checked, * since the request is in the ct_pending queue. */ if (ct->ct_error.re_status == RPC_CANTRECV) { if (errp != &ct->ct_error) { errp->re_errno = ct->ct_error.re_errno; errp->re_status = RPC_CANTRECV; } stat = RPC_CANTRECV; goto out; } TAILQ_INSERT_TAIL(&ct->ct_pending, cr, cr_link); mtx_unlock(&ct->ct_lock); if (ct->ct_sslrefno != 0) { /* * Copy the mbuf chain to a chain of ext_pgs mbuf(s) * as required by KERN_TLS. */ maxextsiz = TLS_MAX_MSG_SIZE_V10_2; #ifdef KERN_TLS maxextsiz = min(maxextsiz, ktls_maxlen); #endif mreq = _rpc_copym_into_ext_pgs(mreq, maxextsiz); } /* * sosend consumes mreq. */ error = sosend(ct->ct_socket, NULL, NULL, mreq, NULL, 0, curthread); mreq = NULL; if (error == EMSGSIZE || (error == ERESTART && (ct->ct_waitflag & PCATCH) == 0 && trycnt-- > 0)) { SOCKBUF_LOCK(&ct->ct_socket->so_snd); sbwait(&ct->ct_socket->so_snd); SOCKBUF_UNLOCK(&ct->ct_socket->so_snd); AUTH_VALIDATE(auth, xid, NULL, NULL); mtx_lock(&ct->ct_lock); TAILQ_REMOVE(&ct->ct_pending, cr, cr_link); /* Sleep for 1 clock tick before trying the sosend() again. */ msleep(&fake_wchan, &ct->ct_lock, 0, "rpclpsnd", 1); printf("TRY AGAIN!!\n"); goto call_again; } reply_msg.acpted_rply.ar_verf.oa_flavor = AUTH_NULL; reply_msg.acpted_rply.ar_verf.oa_base = cr->cr_verf; reply_msg.acpted_rply.ar_verf.oa_length = 0; reply_msg.acpted_rply.ar_results.where = NULL; reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void; mtx_lock(&ct->ct_lock); if (error) { TAILQ_REMOVE(&ct->ct_pending, cr, cr_link); errp->re_errno = error; errp->re_status = stat = RPC_CANTSEND; goto out; } /* * Check to see if we got an upcall while waiting for the * lock. In both these cases, the request has been removed * from ct->ct_pending. */ if (cr->cr_error) { TAILQ_REMOVE(&ct->ct_pending, cr, cr_link); errp->re_errno = cr->cr_error; errp->re_status = stat = RPC_CANTRECV; goto out; } if (cr->cr_mrep) { TAILQ_REMOVE(&ct->ct_pending, cr, cr_link); goto got_reply; } /* * Hack to provide rpc-based message passing */ if (timeout.tv_sec == 0 && timeout.tv_usec == 0) { TAILQ_REMOVE(&ct->ct_pending, cr, cr_link); errp->re_status = stat = RPC_TIMEDOUT; goto out; } error = msleep(cr, &ct->ct_lock, ct->ct_waitflag, ct->ct_waitchan, tvtohz(&timeout)); TAILQ_REMOVE(&ct->ct_pending, cr, cr_link); if (error) { /* * The sleep returned an error so our request is still * on the list. Turn the error code into an * appropriate client status. */ errp->re_errno = error; switch (error) { case EINTR: stat = RPC_INTR; break; case EWOULDBLOCK: stat = RPC_TIMEDOUT; break; default: stat = RPC_CANTRECV; } errp->re_status = stat; goto out; } else { /* * We were woken up by the upcall. If the * upcall had a receive error, report that, * otherwise we have a reply. */ if (cr->cr_error) { errp->re_errno = cr->cr_error; errp->re_status = stat = RPC_CANTRECV; goto out; } } got_reply: /* * Now decode and validate the response. We need to drop the * lock since xdr_replymsg may end up sleeping in malloc. */ mtx_unlock(&ct->ct_lock); if (ext && ext->rc_feedback) ext->rc_feedback(FEEDBACK_OK, proc, ext->rc_feedback_arg); #ifdef notnow { struct mbuf *m, *m2; int txxxx; if (cr->cr_mrep != NULL) { txxxx = m_length(cr->cr_mrep, NULL); if (txxxx > 0) { m = mb_copym_ext_pgs(cr->cr_mrep, txxxx, 16384, M_WAITOK, - false, mb_free_mext_pgs, &m2); + mb_free_mext_pgs, &m2); m2 = cr->cr_mrep; cr->cr_mrep = m; m_freem(m2); } } } #endif xdrmbuf_create(&xdrs, cr->cr_mrep, XDR_DECODE); ok = xdr_replymsg(&xdrs, &reply_msg); cr->cr_mrep = NULL; if (ok) { if ((reply_msg.rm_reply.rp_stat == MSG_ACCEPTED) && (reply_msg.acpted_rply.ar_stat == SUCCESS)) errp->re_status = stat = RPC_SUCCESS; else stat = _seterr_reply(&reply_msg, errp); if (stat == RPC_SUCCESS) { results = xdrmbuf_getall(&xdrs); if (!AUTH_VALIDATE(auth, xid, &reply_msg.acpted_rply.ar_verf, &results)) { errp->re_status = stat = RPC_AUTHERROR; errp->re_why = AUTH_INVALIDRESP; } else { KASSERT(results, ("auth validated but no result")); if (ext) { if ((results->m_flags & M_NOMAP) != 0) ext->rc_mbufoffs = xdrs.x_handy; else ext->rc_mbufoffs = 0; } *resultsp = results; } } /* end successful completion */ /* * If unsuccessful AND error is an authentication error * then refresh credentials and try again, else break */ else if (stat == RPC_AUTHERROR) /* maybe our credentials need to be refreshed ... */ if (nrefreshes > 0 && AUTH_REFRESH(auth, &reply_msg)) { nrefreshes--; XDR_DESTROY(&xdrs); mtx_lock(&ct->ct_lock); goto call_again; } /* end of unsuccessful completion */ } /* end of valid reply message */ else { errp->re_status = stat = RPC_CANTDECODERES; } XDR_DESTROY(&xdrs); mtx_lock(&ct->ct_lock); out: mtx_assert(&ct->ct_lock, MA_OWNED); KASSERT(stat != RPC_SUCCESS || *resultsp, ("RPC_SUCCESS without reply")); if (mreq) m_freem(mreq); if (cr->cr_mrep) m_freem(cr->cr_mrep); ct->ct_threads--; if (ct->ct_closing) wakeup(ct); mtx_unlock(&ct->ct_lock); if (auth && stat != RPC_SUCCESS) AUTH_VALIDATE(auth, xid, NULL, NULL); free(cr, M_RPC); return (stat); } static void clnt_vc_geterr(CLIENT *cl, struct rpc_err *errp) { struct ct_data *ct = (struct ct_data *) cl->cl_private; *errp = ct->ct_error; } static bool_t clnt_vc_freeres(CLIENT *cl, xdrproc_t xdr_res, void *res_ptr) { XDR xdrs; bool_t dummy; xdrs.x_op = XDR_FREE; dummy = (*xdr_res)(&xdrs, res_ptr); return (dummy); } /*ARGSUSED*/ static void clnt_vc_abort(CLIENT *cl) { } static bool_t clnt_vc_control(CLIENT *cl, u_int request, void *info) { struct ct_data *ct = (struct ct_data *)cl->cl_private; void *infop = info; SVCXPRT *xprt; uint64_t *p; mtx_lock(&ct->ct_lock); switch (request) { case CLSET_FD_CLOSE: ct->ct_closeit = TRUE; mtx_unlock(&ct->ct_lock); return (TRUE); case CLSET_FD_NCLOSE: ct->ct_closeit = FALSE; mtx_unlock(&ct->ct_lock); return (TRUE); default: break; } /* for other requests which use info */ if (info == NULL) { mtx_unlock(&ct->ct_lock); return (FALSE); } switch (request) { case CLSET_TIMEOUT: if (time_not_ok((struct timeval *)info)) { mtx_unlock(&ct->ct_lock); return (FALSE); } ct->ct_wait = *(struct timeval *)infop; break; case CLGET_TIMEOUT: *(struct timeval *)infop = ct->ct_wait; break; case CLGET_SERVER_ADDR: (void) memcpy(info, &ct->ct_addr, (size_t)ct->ct_addr.ss_len); break; case CLGET_SVC_ADDR: /* * Slightly different semantics to userland - we use * sockaddr instead of netbuf. */ memcpy(info, &ct->ct_addr, ct->ct_addr.ss_len); break; case CLSET_SVC_ADDR: /* set to new address */ mtx_unlock(&ct->ct_lock); return (FALSE); case CLGET_XID: *(uint32_t *)info = ct->ct_xid; break; case CLSET_XID: /* This will set the xid of the NEXT call */ /* decrement by 1 as clnt_vc_call() increments once */ ct->ct_xid = *(uint32_t *)info - 1; break; case CLGET_VERS: /* * This RELIES on the information that, in the call body, * the version number field is the fifth field from the * beginning of the RPC header. MUST be changed if the * call_struct is changed */ *(uint32_t *)info = ntohl(*(uint32_t *)(void *)(ct->ct_mcallc + 4 * BYTES_PER_XDR_UNIT)); break; case CLSET_VERS: *(uint32_t *)(void *)(ct->ct_mcallc + 4 * BYTES_PER_XDR_UNIT) = htonl(*(uint32_t *)info); break; case CLGET_PROG: /* * This RELIES on the information that, in the call body, * the program number field is the fourth field from the * beginning of the RPC header. MUST be changed if the * call_struct is changed */ *(uint32_t *)info = ntohl(*(uint32_t *)(void *)(ct->ct_mcallc + 3 * BYTES_PER_XDR_UNIT)); break; case CLSET_PROG: *(uint32_t *)(void *)(ct->ct_mcallc + 3 * BYTES_PER_XDR_UNIT) = htonl(*(uint32_t *)info); break; case CLSET_WAITCHAN: ct->ct_waitchan = (const char *)info; break; case CLGET_WAITCHAN: *(const char **) info = ct->ct_waitchan; break; case CLSET_INTERRUPTIBLE: if (*(int *) info) ct->ct_waitflag = PCATCH; else ct->ct_waitflag = 0; break; case CLGET_INTERRUPTIBLE: if (ct->ct_waitflag) *(int *) info = TRUE; else *(int *) info = FALSE; break; case CLSET_BACKCHANNEL: xprt = (SVCXPRT *)info; if (ct->ct_backchannelxprt == NULL) { xprt->xp_p2 = ct; if (ct->ct_sslrefno != 0) xprt->xp_tls = RPCTLS_FLAGS_HANDSHAKE; ct->ct_backchannelxprt = xprt; printf("backch tls=0x%x xprt=%p\n", xprt->xp_tls, xprt); } break; case CLSET_TLS: p = (uint64_t *)info; ct->ct_sslsec = *p++; ct->ct_sslusec = *p++; ct->ct_sslrefno = *p; break; case CLSET_BLOCKRCV: if (*(int *) info) ct->ct_dontrcv = TRUE; else ct->ct_dontrcv = FALSE; break; default: mtx_unlock(&ct->ct_lock); return (FALSE); } mtx_unlock(&ct->ct_lock); return (TRUE); } static void clnt_vc_close(CLIENT *cl) { struct ct_data *ct = (struct ct_data *) cl->cl_private; struct ct_request *cr; mtx_lock(&ct->ct_lock); if (ct->ct_closed) { mtx_unlock(&ct->ct_lock); return; } if (ct->ct_closing) { while (ct->ct_closing) msleep(ct, &ct->ct_lock, 0, "rpcclose", 0); KASSERT(ct->ct_closed, ("client should be closed")); mtx_unlock(&ct->ct_lock); return; } if (ct->ct_socket) { ct->ct_closing = TRUE; mtx_unlock(&ct->ct_lock); SOCKBUF_LOCK(&ct->ct_socket->so_rcv); soupcall_clear(ct->ct_socket, SO_RCV); clnt_vc_upcallsdone(ct); SOCKBUF_UNLOCK(&ct->ct_socket->so_rcv); /* * Abort any pending requests and wait until everyone * has finished with clnt_vc_call. */ mtx_lock(&ct->ct_lock); TAILQ_FOREACH(cr, &ct->ct_pending, cr_link) { cr->cr_xid = 0; cr->cr_error = ESHUTDOWN; wakeup(cr); } while (ct->ct_threads) msleep(ct, &ct->ct_lock, 0, "rpcclose", 0); } ct->ct_closing = FALSE; ct->ct_closed = TRUE; mtx_unlock(&ct->ct_lock); wakeup(ct); } static void clnt_vc_destroy(CLIENT *cl) { struct ct_data *ct = (struct ct_data *) cl->cl_private; struct socket *so = NULL; SVCXPRT *xprt; enum clnt_stat stat; clnt_vc_close(cl); mtx_lock(&ct->ct_lock); xprt = ct->ct_backchannelxprt; ct->ct_backchannelxprt = NULL; if (xprt != NULL) { mtx_unlock(&ct->ct_lock); /* To avoid a LOR. */ sx_xlock(&xprt->xp_lock); mtx_lock(&ct->ct_lock); xprt->xp_p2 = NULL; sx_xunlock(&xprt->xp_lock); } if (ct->ct_socket) { if (ct->ct_closeit) { so = ct->ct_socket; } } mtx_unlock(&ct->ct_lock); mtx_destroy(&ct->ct_lock); if (so) { stat = RPC_FAILED; if (ct->ct_sslrefno != 0) stat = rpctls_cl_disconnect(ct->ct_sslsec, ct->ct_sslusec, ct->ct_sslrefno); if (stat != RPC_SUCCESS) { soshutdown(so, SHUT_WR); soclose(so); } } mem_free(ct, sizeof(struct ct_data)); if (cl->cl_netid && cl->cl_netid[0]) mem_free(cl->cl_netid, strlen(cl->cl_netid) +1); if (cl->cl_tp && cl->cl_tp[0]) mem_free(cl->cl_tp, strlen(cl->cl_tp) +1); mem_free(cl, sizeof(CLIENT)); } /* * Make sure that the time is not garbage. -1 value is disallowed. * Note this is different from time_not_ok in clnt_dg.c */ static bool_t time_not_ok(struct timeval *t) { return (t->tv_sec <= -1 || t->tv_sec > 100000000 || t->tv_usec <= -1 || t->tv_usec > 1000000); } int clnt_vc_soupcall(struct socket *so, void *arg, int waitflag) { struct ct_data *ct = (struct ct_data *) arg; struct uio uio; struct mbuf *m, *m2; struct ct_request *cr; int error, rcvflag, foundreq; uint32_t xid_plus_direction[2], header; bool_t do_read; SVCXPRT *xprt; struct cf_conn *cd; CTASSERT(sizeof(xid_plus_direction) == 2 * sizeof(uint32_t)); /* RPC-over-TLS needs to block reception during handshake upcall. */ mtx_lock(&ct->ct_lock); if (ct->ct_dontrcv) { mtx_unlock(&ct->ct_lock); return (SU_OK); } mtx_unlock(&ct->ct_lock); ct->ct_upcallrefs++; uio.uio_td = curthread; do { /* * If ct_record_resid is zero, we are waiting for a * record mark. */ if (ct->ct_record_resid == 0) { /* * Make sure there is either a whole record * mark in the buffer or there is some other * error condition */ do_read = FALSE; if (sbavail(&so->so_rcv) >= sizeof(uint32_t) || (so->so_rcv.sb_state & SBS_CANTRCVMORE) || so->so_error) do_read = TRUE; if (!do_read) break; SOCKBUF_UNLOCK(&so->so_rcv); uio.uio_resid = sizeof(uint32_t); m = NULL; rcvflag = MSG_DONTWAIT | MSG_SOCALLBCK; error = soreceive(so, NULL, &uio, &m, NULL, &rcvflag); SOCKBUF_LOCK(&so->so_rcv); if (error == EWOULDBLOCK) break; /* * If there was an error, wake up all pending * requests. */ if (error || uio.uio_resid > 0) { wakeup_all: mtx_lock(&ct->ct_lock); if (!error) { /* * We must have got EOF trying * to read from the stream. */ error = ECONNRESET; } ct->ct_error.re_status = RPC_CANTRECV; ct->ct_error.re_errno = error; TAILQ_FOREACH(cr, &ct->ct_pending, cr_link) { cr->cr_error = error; wakeup(cr); } mtx_unlock(&ct->ct_lock); break; } m_copydata(m, 0, sizeof(uint32_t), (char *)&header); header = ntohl(header); ct->ct_record = NULL; ct->ct_record_resid = header & 0x7fffffff; ct->ct_record_eor = ((header & 0x80000000) != 0); m_freem(m); } else { /* * Wait until the socket has the whole record * buffered. */ do_read = FALSE; if (sbavail(&so->so_rcv) >= ct->ct_record_resid || (so->so_rcv.sb_state & SBS_CANTRCVMORE) || so->so_error) do_read = TRUE; if (!do_read) break; /* * We have the record mark. Read as much as * the socket has buffered up to the end of * this record. */ SOCKBUF_UNLOCK(&so->so_rcv); uio.uio_resid = ct->ct_record_resid; m = NULL; rcvflag = MSG_DONTWAIT | MSG_SOCALLBCK; error = soreceive(so, NULL, &uio, &m, NULL, &rcvflag); SOCKBUF_LOCK(&so->so_rcv); if (error == EWOULDBLOCK) break; if (error || uio.uio_resid == ct->ct_record_resid) goto wakeup_all; /* * If we have part of the record already, * chain this bit onto the end. */ if (ct->ct_record) m_last(ct->ct_record)->m_next = m; else ct->ct_record = m; ct->ct_record_resid = uio.uio_resid; /* * If we have the entire record, see if we can * match it to a request. */ if (ct->ct_record_resid == 0 && ct->ct_record_eor) { /* * The XID is in the first uint32_t of * the reply and the message direction * is the second one. */ if (ct->ct_record->m_len < sizeof(xid_plus_direction) && m_length(ct->ct_record, NULL) < sizeof(xid_plus_direction)) { m_freem(ct->ct_record); break; } m_copydata(ct->ct_record, 0, sizeof(xid_plus_direction), (char *)xid_plus_direction); xid_plus_direction[0] = ntohl(xid_plus_direction[0]); xid_plus_direction[1] = ntohl(xid_plus_direction[1]); /* Check message direction. */ if (xid_plus_direction[1] == CALL) { printf("Got backchannel callback\n"); /* This is a backchannel request. */ mtx_lock(&ct->ct_lock); xprt = ct->ct_backchannelxprt; printf("backxprt=%p\n", xprt); if (xprt == NULL) { mtx_unlock(&ct->ct_lock); /* Just throw it away. */ m_freem(ct->ct_record); ct->ct_record = NULL; } else { cd = (struct cf_conn *) xprt->xp_p1; m2 = cd->mreq; /* * The requests are chained * in the m_nextpkt list. */ while (m2 != NULL && m2->m_nextpkt != NULL) /* Find end of list. */ m2 = m2->m_nextpkt; if (m2 != NULL) m2->m_nextpkt = ct->ct_record; else cd->mreq = ct->ct_record; ct->ct_record->m_nextpkt = NULL; ct->ct_record = NULL; xprt_active(xprt); mtx_unlock(&ct->ct_lock); } } else { mtx_lock(&ct->ct_lock); foundreq = 0; TAILQ_FOREACH(cr, &ct->ct_pending, cr_link) { if (cr->cr_xid == xid_plus_direction[0]) { /* * This one * matches. We leave * the reply mbuf in * cr->cr_mrep. Set * the XID to zero so * that we will ignore * any duplicated * replies. */ cr->cr_xid = 0; cr->cr_mrep = ct->ct_record; cr->cr_error = 0; foundreq = 1; wakeup(cr); break; } } mtx_unlock(&ct->ct_lock); if (!foundreq) m_freem(ct->ct_record); ct->ct_record = NULL; } } } } while (m); ct->ct_upcallrefs--; if (ct->ct_upcallrefs < 0) panic("rpcvc upcall refcnt"); if (ct->ct_upcallrefs == 0) wakeup(&ct->ct_upcallrefs); return (SU_OK); } /* * Wait for all upcalls in progress to complete. */ static void clnt_vc_upcallsdone(struct ct_data *ct) { SOCKBUF_LOCK_ASSERT(&ct->ct_socket->so_rcv); while (ct->ct_upcallrefs > 0) (void) msleep(&ct->ct_upcallrefs, SOCKBUF_MTX(&ct->ct_socket->so_rcv), 0, "rpcvcup", 0); } Index: projects/nfs-over-tls/sys/rpc/rpc_generic.c =================================================================== --- projects/nfs-over-tls/sys/rpc/rpc_generic.c (revision 360215) +++ projects/nfs-over-tls/sys/rpc/rpc_generic.c (revision 360216) @@ -1,983 +1,981 @@ /* $NetBSD: rpc_generic.c,v 1.4 2000/09/28 09:07:04 kleink Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1986-1991 by Sun Microsystems Inc. */ /* #pragma ident "@(#)rpc_generic.c 1.17 94/04/24 SMI" */ #include __FBSDID("$FreeBSD$"); /* * rpc_generic.c, Miscl routines for RPC. * */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern u_long sb_max_adj; /* not defined in socketvar.h */ /* Provide an entry point hook for the rpcsec_gss module. */ struct rpc_gss_entries rpc_gss_entries; struct handle { NCONF_HANDLE *nhandle; int nflag; /* Whether NETPATH or NETCONFIG */ int nettype; }; static const struct _rpcnettype { const char *name; const int type; } _rpctypelist[] = { { "netpath", _RPC_NETPATH }, { "visible", _RPC_VISIBLE }, { "circuit_v", _RPC_CIRCUIT_V }, { "datagram_v", _RPC_DATAGRAM_V }, { "circuit_n", _RPC_CIRCUIT_N }, { "datagram_n", _RPC_DATAGRAM_N }, { "tcp", _RPC_TCP }, { "udp", _RPC_UDP }, { 0, _RPC_NONE } }; struct netid_af { const char *netid; int af; int protocol; }; static const struct netid_af na_cvt[] = { { "udp", AF_INET, IPPROTO_UDP }, { "tcp", AF_INET, IPPROTO_TCP }, #ifdef INET6 { "udp6", AF_INET6, IPPROTO_UDP }, { "tcp6", AF_INET6, IPPROTO_TCP }, #endif { "local", AF_LOCAL, 0 } }; struct rpc_createerr rpc_createerr; /* * Find the appropriate buffer size */ u_int /*ARGSUSED*/ __rpc_get_t_size(int af, int proto, int size) { int defsize; switch (proto) { case IPPROTO_TCP: defsize = 64 * 1024; /* XXX */ break; case IPPROTO_UDP: defsize = UDPMSGSIZE; break; default: defsize = RPC_MAXDATASIZE; break; } if (size == 0) return defsize; /* Check whether the value is within the upper max limit */ return (size > sb_max_adj ? (u_int)sb_max_adj : (u_int)size); } /* * Find the appropriate address buffer size */ u_int __rpc_get_a_size(af) int af; { switch (af) { case AF_INET: return sizeof (struct sockaddr_in); #ifdef INET6 case AF_INET6: return sizeof (struct sockaddr_in6); #endif case AF_LOCAL: return sizeof (struct sockaddr_un); default: break; } return ((u_int)RPC_MAXADDRSIZE); } #if 0 /* * Used to ping the NULL procedure for clnt handle. * Returns NULL if fails, else a non-NULL pointer. */ void * rpc_nullproc(clnt) CLIENT *clnt; { struct timeval TIMEOUT = {25, 0}; if (clnt_call(clnt, NULLPROC, (xdrproc_t) xdr_void, NULL, (xdrproc_t) xdr_void, NULL, TIMEOUT) != RPC_SUCCESS) { return (NULL); } return ((void *) clnt); } #endif int __rpc_socket2sockinfo(struct socket *so, struct __rpc_sockinfo *sip) { int type, proto; struct sockaddr *sa; sa_family_t family; struct sockopt opt; int error; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa); CURVNET_RESTORE(); if (error) return 0; sip->si_alen = sa->sa_len; family = sa->sa_family; free(sa, M_SONAME); opt.sopt_dir = SOPT_GET; opt.sopt_level = SOL_SOCKET; opt.sopt_name = SO_TYPE; opt.sopt_val = &type; opt.sopt_valsize = sizeof type; opt.sopt_td = NULL; error = sogetopt(so, &opt); if (error) return 0; /* XXX */ if (family != AF_LOCAL) { if (type == SOCK_STREAM) proto = IPPROTO_TCP; else if (type == SOCK_DGRAM) proto = IPPROTO_UDP; else return 0; } else proto = 0; sip->si_af = family; sip->si_proto = proto; sip->si_socktype = type; return 1; } /* * Linear search, but the number of entries is small. */ int __rpc_nconf2sockinfo(const struct netconfig *nconf, struct __rpc_sockinfo *sip) { int i; for (i = 0; i < (sizeof na_cvt) / (sizeof (struct netid_af)); i++) if (strcmp(na_cvt[i].netid, nconf->nc_netid) == 0 || ( strcmp(nconf->nc_netid, "unix") == 0 && strcmp(na_cvt[i].netid, "local") == 0)) { sip->si_af = na_cvt[i].af; sip->si_proto = na_cvt[i].protocol; sip->si_socktype = __rpc_seman2socktype((int)nconf->nc_semantics); if (sip->si_socktype == -1) return 0; sip->si_alen = __rpc_get_a_size(sip->si_af); return 1; } return 0; } struct socket * __rpc_nconf2socket(const struct netconfig *nconf) { struct __rpc_sockinfo si; struct socket *so; int error; if (!__rpc_nconf2sockinfo(nconf, &si)) return 0; so = NULL; error = socreate(si.si_af, &so, si.si_socktype, si.si_proto, curthread->td_ucred, curthread); if (error) return NULL; else return so; } char * taddr2uaddr(const struct netconfig *nconf, const struct netbuf *nbuf) { struct __rpc_sockinfo si; if (!__rpc_nconf2sockinfo(nconf, &si)) return NULL; return __rpc_taddr2uaddr_af(si.si_af, nbuf); } struct netbuf * uaddr2taddr(const struct netconfig *nconf, const char *uaddr) { struct __rpc_sockinfo si; if (!__rpc_nconf2sockinfo(nconf, &si)) return NULL; return __rpc_uaddr2taddr_af(si.si_af, uaddr); } char * __rpc_taddr2uaddr_af(int af, const struct netbuf *nbuf) { char *ret; struct sbuf sb; struct sockaddr_in *sin; struct sockaddr_un *sun; char namebuf[INET_ADDRSTRLEN]; #ifdef INET6 struct sockaddr_in6 *sin6; char namebuf6[INET6_ADDRSTRLEN]; #endif u_int16_t port; sbuf_new(&sb, NULL, 0, SBUF_AUTOEXTEND); switch (af) { case AF_INET: if (nbuf->len < sizeof(*sin)) return NULL; sin = nbuf->buf; if (inet_ntop(af, &sin->sin_addr, namebuf, sizeof namebuf) == NULL) return NULL; port = ntohs(sin->sin_port); if (sbuf_printf(&sb, "%s.%u.%u", namebuf, ((uint32_t)port) >> 8, port & 0xff) < 0) return NULL; break; #ifdef INET6 case AF_INET6: if (nbuf->len < sizeof(*sin6)) return NULL; sin6 = nbuf->buf; if (inet_ntop(af, &sin6->sin6_addr, namebuf6, sizeof namebuf6) == NULL) return NULL; port = ntohs(sin6->sin6_port); if (sbuf_printf(&sb, "%s.%u.%u", namebuf6, ((uint32_t)port) >> 8, port & 0xff) < 0) return NULL; break; #endif case AF_LOCAL: sun = nbuf->buf; if (sbuf_printf(&sb, "%.*s", (int)(sun->sun_len - offsetof(struct sockaddr_un, sun_path)), sun->sun_path) < 0) return (NULL); break; default: return NULL; } sbuf_finish(&sb); ret = strdup(sbuf_data(&sb), M_RPC); sbuf_delete(&sb); return ret; } struct netbuf * __rpc_uaddr2taddr_af(int af, const char *uaddr) { struct netbuf *ret = NULL; char *addrstr, *p; unsigned port, portlo, porthi; struct sockaddr_in *sin; #ifdef INET6 struct sockaddr_in6 *sin6; #endif struct sockaddr_un *sun; port = 0; sin = NULL; if (uaddr == NULL) return NULL; addrstr = strdup(uaddr, M_RPC); if (addrstr == NULL) return NULL; /* * AF_LOCAL addresses are expected to be absolute * pathnames, anything else will be AF_INET or AF_INET6. */ if (*addrstr != '/') { p = strrchr(addrstr, '.'); if (p == NULL) goto out; portlo = (unsigned)strtol(p + 1, NULL, 10); *p = '\0'; p = strrchr(addrstr, '.'); if (p == NULL) goto out; porthi = (unsigned)strtol(p + 1, NULL, 10); *p = '\0'; port = (porthi << 8) | portlo; } ret = (struct netbuf *)malloc(sizeof *ret, M_RPC, M_WAITOK); switch (af) { case AF_INET: sin = (struct sockaddr_in *)malloc(sizeof *sin, M_RPC, M_WAITOK); memset(sin, 0, sizeof *sin); sin->sin_family = AF_INET; sin->sin_port = htons(port); if (inet_pton(AF_INET, addrstr, &sin->sin_addr) <= 0) { free(sin, M_RPC); free(ret, M_RPC); ret = NULL; goto out; } sin->sin_len = ret->maxlen = ret->len = sizeof *sin; ret->buf = sin; break; #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)malloc(sizeof *sin6, M_RPC, M_WAITOK); memset(sin6, 0, sizeof *sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(port); if (inet_pton(AF_INET6, addrstr, &sin6->sin6_addr) <= 0) { free(sin6, M_RPC); free(ret, M_RPC); ret = NULL; goto out; } sin6->sin6_len = ret->maxlen = ret->len = sizeof *sin6; ret->buf = sin6; break; #endif case AF_LOCAL: sun = (struct sockaddr_un *)malloc(sizeof *sun, M_RPC, M_WAITOK); memset(sun, 0, sizeof *sun); sun->sun_family = AF_LOCAL; strncpy(sun->sun_path, addrstr, sizeof(sun->sun_path) - 1); ret->len = ret->maxlen = sun->sun_len = SUN_LEN(sun); ret->buf = sun; break; default: break; } out: free(addrstr, M_RPC); return ret; } int __rpc_seman2socktype(int semantics) { switch (semantics) { case NC_TPI_CLTS: return SOCK_DGRAM; case NC_TPI_COTS_ORD: return SOCK_STREAM; case NC_TPI_RAW: return SOCK_RAW; default: break; } return -1; } int __rpc_socktype2seman(int socktype) { switch (socktype) { case SOCK_DGRAM: return NC_TPI_CLTS; case SOCK_STREAM: return NC_TPI_COTS_ORD; case SOCK_RAW: return NC_TPI_RAW; default: break; } return -1; } /* * Returns the type of the network as defined in * If nettype is NULL, it defaults to NETPATH. */ static int getnettype(const char *nettype) { int i; if ((nettype == NULL) || (nettype[0] == 0)) { return (_RPC_NETPATH); /* Default */ } #if 0 nettype = strlocase(nettype); #endif for (i = 0; _rpctypelist[i].name; i++) if (strcasecmp(nettype, _rpctypelist[i].name) == 0) { return (_rpctypelist[i].type); } return (_rpctypelist[i].type); } /* * For the given nettype (tcp or udp only), return the first structure found. * This should be freed by calling freenetconfigent() */ struct netconfig * __rpc_getconfip(const char *nettype) { char *netid; static char *netid_tcp = (char *) NULL; static char *netid_udp = (char *) NULL; struct netconfig *dummy; if (!netid_udp && !netid_tcp) { struct netconfig *nconf; void *confighandle; if (!(confighandle = setnetconfig())) { log(LOG_ERR, "rpc: failed to open " NETCONFIG); return (NULL); } while ((nconf = getnetconfig(confighandle)) != NULL) { if (strcmp(nconf->nc_protofmly, NC_INET) == 0) { if (strcmp(nconf->nc_proto, NC_TCP) == 0) { netid_tcp = strdup(nconf->nc_netid, M_RPC); } else if (strcmp(nconf->nc_proto, NC_UDP) == 0) { netid_udp = strdup(nconf->nc_netid, M_RPC); } } } endnetconfig(confighandle); } if (strcmp(nettype, "udp") == 0) netid = netid_udp; else if (strcmp(nettype, "tcp") == 0) netid = netid_tcp; else { return (NULL); } if ((netid == NULL) || (netid[0] == 0)) { return (NULL); } dummy = getnetconfigent(netid); return (dummy); } /* * Returns the type of the nettype, which should then be used with * __rpc_getconf(). * * For simplicity in the kernel, we don't support the NETPATH * environment variable. We behave as userland would then NETPATH is * unset, i.e. iterate over all visible entries in netconfig. */ void * __rpc_setconf(nettype) const char *nettype; { struct handle *handle; handle = (struct handle *) malloc(sizeof (struct handle), M_RPC, M_WAITOK); switch (handle->nettype = getnettype(nettype)) { case _RPC_NETPATH: case _RPC_CIRCUIT_N: case _RPC_DATAGRAM_N: if (!(handle->nhandle = setnetconfig())) goto failed; handle->nflag = TRUE; break; case _RPC_VISIBLE: case _RPC_CIRCUIT_V: case _RPC_DATAGRAM_V: case _RPC_TCP: case _RPC_UDP: if (!(handle->nhandle = setnetconfig())) { log(LOG_ERR, "rpc: failed to open " NETCONFIG); goto failed; } handle->nflag = FALSE; break; default: goto failed; } return (handle); failed: free(handle, M_RPC); return (NULL); } /* * Returns the next netconfig struct for the given "net" type. * __rpc_setconf() should have been called previously. */ struct netconfig * __rpc_getconf(void *vhandle) { struct handle *handle; struct netconfig *nconf; handle = (struct handle *)vhandle; if (handle == NULL) { return (NULL); } for (;;) { if (handle->nflag) { nconf = getnetconfig(handle->nhandle); if (nconf && !(nconf->nc_flag & NC_VISIBLE)) continue; } else { nconf = getnetconfig(handle->nhandle); } if (nconf == NULL) break; if ((nconf->nc_semantics != NC_TPI_CLTS) && (nconf->nc_semantics != NC_TPI_COTS) && (nconf->nc_semantics != NC_TPI_COTS_ORD)) continue; switch (handle->nettype) { case _RPC_VISIBLE: if (!(nconf->nc_flag & NC_VISIBLE)) continue; /* FALLTHROUGH */ case _RPC_NETPATH: /* Be happy */ break; case _RPC_CIRCUIT_V: if (!(nconf->nc_flag & NC_VISIBLE)) continue; /* FALLTHROUGH */ case _RPC_CIRCUIT_N: if ((nconf->nc_semantics != NC_TPI_COTS) && (nconf->nc_semantics != NC_TPI_COTS_ORD)) continue; break; case _RPC_DATAGRAM_V: if (!(nconf->nc_flag & NC_VISIBLE)) continue; /* FALLTHROUGH */ case _RPC_DATAGRAM_N: if (nconf->nc_semantics != NC_TPI_CLTS) continue; break; case _RPC_TCP: if (((nconf->nc_semantics != NC_TPI_COTS) && (nconf->nc_semantics != NC_TPI_COTS_ORD)) || (strcmp(nconf->nc_protofmly, NC_INET) #ifdef INET6 && strcmp(nconf->nc_protofmly, NC_INET6)) #else ) #endif || strcmp(nconf->nc_proto, NC_TCP)) continue; break; case _RPC_UDP: if ((nconf->nc_semantics != NC_TPI_CLTS) || (strcmp(nconf->nc_protofmly, NC_INET) #ifdef INET6 && strcmp(nconf->nc_protofmly, NC_INET6)) #else ) #endif || strcmp(nconf->nc_proto, NC_UDP)) continue; break; } break; } return (nconf); } void __rpc_endconf(vhandle) void * vhandle; { struct handle *handle; handle = (struct handle *) vhandle; if (handle == NULL) { return; } endnetconfig(handle->nhandle); free(handle, M_RPC); } int __rpc_sockisbound(struct socket *so) { struct sockaddr *sa; int error, bound; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa); CURVNET_RESTORE(); if (error) return (0); switch (sa->sa_family) { case AF_INET: bound = (((struct sockaddr_in *) sa)->sin_port != 0); break; #ifdef INET6 case AF_INET6: bound = (((struct sockaddr_in6 *) sa)->sin6_port != 0); break; #endif case AF_LOCAL: /* XXX check this */ bound = (((struct sockaddr_un *) sa)->sun_path[0] != '\0'); break; default: bound = FALSE; break; } free(sa, M_SONAME); return bound; } /* * Implement XDR-style API for RPC call. */ enum clnt_stat clnt_call_private( CLIENT *cl, /* client handle */ struct rpc_callextra *ext, /* call metadata */ rpcproc_t proc, /* procedure number */ xdrproc_t xargs, /* xdr routine for args */ void *argsp, /* pointer to args */ xdrproc_t xresults, /* xdr routine for results */ void *resultsp, /* pointer to results */ struct timeval utimeout) /* seconds to wait before giving up */ { XDR xdrs; struct mbuf *mreq; struct mbuf *mrep; enum clnt_stat stat; mreq = m_getcl(M_WAITOK, MT_DATA, 0); xdrmbuf_create(&xdrs, mreq, XDR_ENCODE); if (!xargs(&xdrs, argsp)) { m_freem(mreq); return (RPC_CANTENCODEARGS); } XDR_DESTROY(&xdrs); stat = CLNT_CALL_MBUF(cl, ext, proc, mreq, &mrep, utimeout); m_freem(mreq); if (stat == RPC_SUCCESS) { xdrmbuf_create(&xdrs, mrep, XDR_DECODE); if (!xresults(&xdrs, resultsp)) { XDR_DESTROY(&xdrs); return (RPC_CANTDECODERES); } XDR_DESTROY(&xdrs); } return (stat); } /* * Bind a socket to a privileged IP port */ int bindresvport(struct socket *so, struct sockaddr *sa) { int old, error, af; bool_t freesa = FALSE; struct sockaddr_in *sin; #ifdef INET6 struct sockaddr_in6 *sin6; #endif struct sockopt opt; int proto, portrange, portlow; u_int16_t *portp; socklen_t salen; if (sa == NULL) { CURVNET_SET(so->so_vnet); error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa); CURVNET_RESTORE(); if (error) return (error); freesa = TRUE; af = sa->sa_family; salen = sa->sa_len; memset(sa, 0, sa->sa_len); } else { af = sa->sa_family; salen = sa->sa_len; } switch (af) { case AF_INET: proto = IPPROTO_IP; portrange = IP_PORTRANGE; portlow = IP_PORTRANGE_LOW; sin = (struct sockaddr_in *)sa; portp = &sin->sin_port; break; #ifdef INET6 case AF_INET6: proto = IPPROTO_IPV6; portrange = IPV6_PORTRANGE; portlow = IPV6_PORTRANGE_LOW; sin6 = (struct sockaddr_in6 *)sa; portp = &sin6->sin6_port; break; #endif default: return (EPFNOSUPPORT); } sa->sa_family = af; sa->sa_len = salen; if (*portp == 0) { bzero(&opt, sizeof(opt)); opt.sopt_dir = SOPT_GET; opt.sopt_level = proto; opt.sopt_name = portrange; opt.sopt_val = &old; opt.sopt_valsize = sizeof(old); error = sogetopt(so, &opt); if (error) { goto out; } opt.sopt_dir = SOPT_SET; opt.sopt_val = &portlow; error = sosetopt(so, &opt); if (error) goto out; } error = sobind(so, sa, curthread); if (*portp == 0) { if (error) { opt.sopt_dir = SOPT_SET; opt.sopt_val = &old; sosetopt(so, &opt); } } out: if (freesa) free(sa, M_SONAME); return (error); } /* * Make sure an mbuf list is made up entirely of ext_pgs mbufs. * This is needed for sosend() when KERN_TLS is being used. * (There might also be a performance improvement for certain * network interfaces that handle ext_pgs mbufs efficiently.) * It expects at least one non-ext_pgs mbuf followed by zero * or more ext_pgs mbufs. It does not handle the case where * non-ext_pgs mbuf(s) follow ext_pgs ones. * It also performs sanity checks on the resultant list. * The "mp" argument list is consumed. * The "maxextsiz" argument is the upper bound on the data * size for each mbuf (usually 16K for KERN_TLS). */ struct mbuf * _rpc_copym_into_ext_pgs(struct mbuf *mp, int maxextsiz) { struct mbuf *m, *m2, *m3, *mhead; int tlen; KASSERT((mp->m_flags & (M_EXT | M_NOMAP)) != (M_EXT | M_NOMAP), ("_rpc_copym_into_ext_pgs:" " first mbuf is an ext_pgs")); /* * Find the last non-ext_pgs mbuf and the total * length of the non-ext_pgs mbuf(s). * The first mbuf must always be a non-ext_pgs * mbuf. */ tlen = mp->m_len; m2 = mp; for (m = mp->m_next; m != NULL; m = m->m_next) { if ((m->m_flags & M_NOMAP) != 0) break; tlen += m->m_len; m2 = m; } /* * Copy the non-ext_pgs mbuf(s) into an ext_pgs * mbuf list. */ m2->m_next = NULL; mhead = mb_copym_ext_pgs(mp, tlen, maxextsiz, M_WAITOK, - true, mb_free_mext_pgs, &m2); + mb_free_mext_pgs, &m2); /* * Link the ext_pgs list onto the newly copied * list and free up the non-ext_pgs mbuf(s). */ - mhead->m_pkthdr.len = mp->m_pkthdr.len; m2->m_next = m; m_freem(mp); /* * Sanity check the resultant mbuf list. Check for and * remove any 0 length mbufs in the list, since the * KERN_TLS code does not expect any 0 length mbuf(s) * in the list. */ m3 = NULL; m2 = mhead; tlen = 0; while (m2 != NULL) { KASSERT(m2->m_len >= 0, ("_rpc_copym_into_ext_pgs:" " negative m_len")); KASSERT((m2->m_flags & (M_EXT | M_NOMAP)) == (M_EXT | M_NOMAP), ("_rpc_copym_into_ext_pgs:" " non-nomap mbuf in list")); if (m2->m_len == 0) { if (m3 != NULL) m3->m_next = m2->m_next; else m = m2->m_next; m2->m_next = NULL; m_free(m2); if (m3 != NULL) m2 = m3->m_next; else m2 = m; } else { - MBUF_EXT_PGS_ASSERT_SANITY( - m2->m_ext.ext_pgs); + MBUF_EXT_PGS_ASSERT_SANITY(&m2->m_ext_pgs); m3 = m2; tlen += m2->m_len; m2 = m2->m_next; } } KASSERT(tlen == mhead->m_pkthdr.len, ("_rpc_copym_into_ext_pgs: tlen=%d pkthdrlen=%d", tlen, mhead->m_pkthdr.len)); return (mhead); } /* * Kernel module glue */ static int krpc_modevent(module_t mod, int type, void *data) { return (0); } static moduledata_t krpc_mod = { "krpc", krpc_modevent, NULL, }; DECLARE_MODULE(krpc, krpc_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(krpc, 1); MODULE_DEPEND(krpc, xdr, 1, 1, 1); Index: projects/nfs-over-tls/sys/rpc/svc_vc.c =================================================================== --- projects/nfs-over-tls/sys/rpc/svc_vc.c (revision 360215) +++ projects/nfs-over-tls/sys/rpc/svc_vc.c (revision 360216) @@ -1,1099 +1,1099 @@ /* $NetBSD: svc_vc.c,v 1.7 2000/08/03 00:01:53 fvdl Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)svc_tcp.c 1.21 87/08/11 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)svc_tcp.c 2.2 88/08/01 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * svc_vc.c, Server side for Connection Oriented based RPC. * * Actually implements two flavors of transporter - * a tcp rendezvouser (a listner and connection establisher) * and a record/tcp stream. */ #include "opt_kern_tls.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KERN_TLS extern u_int ktls_maxlen; #endif static bool_t svc_vc_rendezvous_recv(SVCXPRT *, struct rpc_msg *, struct sockaddr **, struct mbuf **); static enum xprt_stat svc_vc_rendezvous_stat(SVCXPRT *); static void svc_vc_rendezvous_destroy(SVCXPRT *); static bool_t svc_vc_null(void); static void svc_vc_destroy(SVCXPRT *); static enum xprt_stat svc_vc_stat(SVCXPRT *); static bool_t svc_vc_ack(SVCXPRT *, uint32_t *); static bool_t svc_vc_recv(SVCXPRT *, struct rpc_msg *, struct sockaddr **, struct mbuf **); static bool_t svc_vc_reply(SVCXPRT *, struct rpc_msg *, struct sockaddr *, struct mbuf *, uint32_t *seq); static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in); static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq, void *in); static void svc_vc_backchannel_destroy(SVCXPRT *); static enum xprt_stat svc_vc_backchannel_stat(SVCXPRT *); static bool_t svc_vc_backchannel_recv(SVCXPRT *, struct rpc_msg *, struct sockaddr **, struct mbuf **); static bool_t svc_vc_backchannel_reply(SVCXPRT *, struct rpc_msg *, struct sockaddr *, struct mbuf *, uint32_t *); static bool_t svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq, void *in); static SVCXPRT *svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr); static int svc_vc_accept(struct socket *head, struct socket **sop); static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag); static int svc_vc_rendezvous_soupcall(struct socket *, void *, int); static struct xp_ops svc_vc_rendezvous_ops = { .xp_recv = svc_vc_rendezvous_recv, .xp_stat = svc_vc_rendezvous_stat, .xp_reply = (bool_t (*)(SVCXPRT *, struct rpc_msg *, struct sockaddr *, struct mbuf *, uint32_t *))svc_vc_null, .xp_destroy = svc_vc_rendezvous_destroy, .xp_control = svc_vc_rendezvous_control }; static struct xp_ops svc_vc_ops = { .xp_recv = svc_vc_recv, .xp_stat = svc_vc_stat, .xp_ack = svc_vc_ack, .xp_reply = svc_vc_reply, .xp_destroy = svc_vc_destroy, .xp_control = svc_vc_control }; static struct xp_ops svc_vc_backchannel_ops = { .xp_recv = svc_vc_backchannel_recv, .xp_stat = svc_vc_backchannel_stat, .xp_reply = svc_vc_backchannel_reply, .xp_destroy = svc_vc_backchannel_destroy, .xp_control = svc_vc_backchannel_control }; /* * Usage: * xprt = svc_vc_create(sock, send_buf_size, recv_buf_size); * * Creates, registers, and returns a (rpc) tcp based transporter. * Once *xprt is initialized, it is registered as a transporter * see (svc.h, xprt_register). This routine returns * a NULL if a problem occurred. * * The filedescriptor passed in is expected to refer to a bound, but * not yet connected socket. * * Since streams do buffered io similar to stdio, the caller can specify * how big the send and receive buffers are via the second and third parms; * 0 => use the system default. */ SVCXPRT * svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize, size_t recvsize) { SVCXPRT *xprt; struct sockaddr* sa; int error; SOCK_LOCK(so); if (so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED)) { SOCK_UNLOCK(so); CURVNET_SET(so->so_vnet); error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa); CURVNET_RESTORE(); if (error) return (NULL); xprt = svc_vc_create_conn(pool, so, sa); free(sa, M_SONAME); return (xprt); } SOCK_UNLOCK(so); xprt = svc_xprt_alloc(); sx_init(&xprt->xp_lock, "xprt->xp_lock"); xprt->xp_pool = pool; xprt->xp_socket = so; xprt->xp_p1 = NULL; xprt->xp_p2 = NULL; xprt->xp_ops = &svc_vc_rendezvous_ops; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa); CURVNET_RESTORE(); if (error) { goto cleanup_svc_vc_create; } memcpy(&xprt->xp_ltaddr, sa, sa->sa_len); free(sa, M_SONAME); xprt_register(xprt); solisten(so, -1, curthread); SOLISTEN_LOCK(so); xprt->xp_upcallset = 1; solisten_upcall_set(so, svc_vc_rendezvous_soupcall, xprt); SOLISTEN_UNLOCK(so); return (xprt); cleanup_svc_vc_create: sx_destroy(&xprt->xp_lock); svc_xprt_free(xprt); return (NULL); } /* * Create a new transport for a socket optained via soaccept(). */ SVCXPRT * svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr) { SVCXPRT *xprt; struct cf_conn *cd; struct sockaddr* sa = NULL; struct sockopt opt; int one = 1; int error; bzero(&opt, sizeof(struct sockopt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = SOL_SOCKET; opt.sopt_name = SO_KEEPALIVE; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(so, &opt); if (error) { return (NULL); } if (so->so_proto->pr_protocol == IPPROTO_TCP) { bzero(&opt, sizeof(struct sockopt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(so, &opt); if (error) { return (NULL); } } cd = mem_alloc(sizeof(*cd)); cd->strm_stat = XPRT_IDLE; xprt = svc_xprt_alloc(); sx_init(&xprt->xp_lock, "xprt->xp_lock"); xprt->xp_pool = pool; xprt->xp_socket = so; xprt->xp_p1 = cd; xprt->xp_p2 = NULL; xprt->xp_ops = &svc_vc_ops; /* * See http://www.connectathon.org/talks96/nfstcp.pdf - client * has a 5 minute timer, server has a 6 minute timer. */ xprt->xp_idletimeout = 6 * 60; memcpy(&xprt->xp_rtaddr, raddr, raddr->sa_len); CURVNET_SET(so->so_vnet); error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa); CURVNET_RESTORE(); if (error) goto cleanup_svc_vc_create; memcpy(&xprt->xp_ltaddr, sa, sa->sa_len); free(sa, M_SONAME); xprt_register(xprt); SOCKBUF_LOCK(&so->so_rcv); xprt->xp_upcallset = 1; soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt); SOCKBUF_UNLOCK(&so->so_rcv); /* * Throw the transport into the active list in case it already * has some data buffered. */ sx_xlock(&xprt->xp_lock); xprt_active(xprt); sx_xunlock(&xprt->xp_lock); return (xprt); cleanup_svc_vc_create: sx_destroy(&xprt->xp_lock); svc_xprt_free(xprt); mem_free(cd, sizeof(*cd)); return (NULL); } /* * Create a new transport for a backchannel on a clnt_vc socket. */ SVCXPRT * svc_vc_create_backchannel(SVCPOOL *pool) { SVCXPRT *xprt = NULL; struct cf_conn *cd = NULL; cd = mem_alloc(sizeof(*cd)); cd->strm_stat = XPRT_IDLE; xprt = svc_xprt_alloc(); sx_init(&xprt->xp_lock, "xprt->xp_lock"); xprt->xp_pool = pool; xprt->xp_socket = NULL; xprt->xp_p1 = cd; xprt->xp_p2 = NULL; xprt->xp_ops = &svc_vc_backchannel_ops; return (xprt); } /* * This does all of the accept except the final call to soaccept. The * caller will call soaccept after dropping its locks (soaccept may * call malloc). */ int svc_vc_accept(struct socket *head, struct socket **sop) { struct socket *so; int error = 0; short nbio; /* XXXGL: shouldn't that be an assertion? */ if ((head->so_options & SO_ACCEPTCONN) == 0) { error = EINVAL; goto done; } #ifdef MAC error = mac_socket_check_accept(curthread->td_ucred, head); if (error != 0) goto done; #endif /* * XXXGL: we want non-blocking semantics. The socket could be a * socket created by kernel as well as socket shared with userland, * so we can't be sure about presense of SS_NBIO. We also shall not * toggle it on the socket, since that may surprise userland. So we * set SS_NBIO only temporarily. */ SOLISTEN_LOCK(head); nbio = head->so_state & SS_NBIO; head->so_state |= SS_NBIO; error = solisten_dequeue(head, &so, 0); head->so_state &= (nbio & ~SS_NBIO); if (error) goto done; so->so_state |= nbio; *sop = so; /* connection has been removed from the listen queue */ KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0); done: return (error); } /*ARGSUSED*/ static bool_t svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg, struct sockaddr **addrp, struct mbuf **mp) { struct socket *so = NULL; struct sockaddr *sa = NULL; int error; SVCXPRT *new_xprt; /* * The socket upcall calls xprt_active() which will eventually * cause the server to call us here. We attempt to accept a * connection from the socket and turn it into a new * transport. If the accept fails, we have drained all pending * connections so we call xprt_inactive(). */ sx_xlock(&xprt->xp_lock); error = svc_vc_accept(xprt->xp_socket, &so); if (error == EWOULDBLOCK) { /* * We must re-test for new connections after taking * the lock to protect us in the case where a new * connection arrives after our call to accept fails * with EWOULDBLOCK. */ SOLISTEN_LOCK(xprt->xp_socket); if (TAILQ_EMPTY(&xprt->xp_socket->sol_comp)) xprt_inactive_self(xprt); SOLISTEN_UNLOCK(xprt->xp_socket); sx_xunlock(&xprt->xp_lock); return (FALSE); } if (error) { SOLISTEN_LOCK(xprt->xp_socket); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; soupcall_clear(xprt->xp_socket, SO_RCV); } SOLISTEN_UNLOCK(xprt->xp_socket); xprt_inactive_self(xprt); sx_xunlock(&xprt->xp_lock); return (FALSE); } sx_xunlock(&xprt->xp_lock); sa = NULL; error = soaccept(so, &sa); if (error) { /* * XXX not sure if I need to call sofree or soclose here. */ if (sa) free(sa, M_SONAME); return (FALSE); } /* * svc_vc_create_conn will call xprt_register - we don't need * to do anything with the new connection except derefence it. */ new_xprt = svc_vc_create_conn(xprt->xp_pool, so, sa); if (!new_xprt) { soclose(so); } else { SVC_RELEASE(new_xprt); } free(sa, M_SONAME); return (FALSE); /* there is never an rpc msg to be processed */ } /*ARGSUSED*/ static enum xprt_stat svc_vc_rendezvous_stat(SVCXPRT *xprt) { return (XPRT_IDLE); } static void svc_vc_destroy_common(SVCXPRT *xprt) { enum clnt_stat stat; if (xprt->xp_socket) { stat = RPC_FAILED; if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) stat = rpctls_srv_disconnect(xprt->xp_sslsec, xprt->xp_sslusec, xprt->xp_sslrefno); if (stat != RPC_SUCCESS) (void)soclose(xprt->xp_socket); } if (xprt->xp_netid) (void) mem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1); svc_xprt_free(xprt); } static void svc_vc_rendezvous_destroy(SVCXPRT *xprt) { SOLISTEN_LOCK(xprt->xp_socket); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; solisten_upcall_set(xprt->xp_socket, NULL, NULL); } SOLISTEN_UNLOCK(xprt->xp_socket); svc_vc_destroy_common(xprt); } static void svc_vc_destroy(SVCXPRT *xprt) { struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1; SOCKBUF_LOCK(&xprt->xp_socket->so_rcv); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; soupcall_clear(xprt->xp_socket, SO_RCV); } SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv); svc_vc_destroy_common(xprt); if (cd->mreq) m_freem(cd->mreq); if (cd->mpending) m_freem(cd->mpending); mem_free(cd, sizeof(*cd)); } static void svc_vc_backchannel_destroy(SVCXPRT *xprt) { struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1; struct mbuf *m, *m2; svc_xprt_free(xprt); m = cd->mreq; while (m != NULL) { m2 = m; m = m->m_nextpkt; m_freem(m2); } mem_free(cd, sizeof(*cd)); } /*ARGSUSED*/ static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in) { return (FALSE); } static bool_t svc_vc_rendezvous_control(SVCXPRT *xprt, const u_int rq, void *in) { return (FALSE); } static bool_t svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq, void *in) { return (FALSE); } static enum xprt_stat svc_vc_stat(SVCXPRT *xprt) { struct cf_conn *cd; cd = (struct cf_conn *)(xprt->xp_p1); if (cd->strm_stat == XPRT_DIED) return (XPRT_DIED); if (cd->mreq != NULL && cd->resid == 0 && cd->eor) return (XPRT_MOREREQS); if (soreadable(xprt->xp_socket)) return (XPRT_MOREREQS); return (XPRT_IDLE); } static bool_t svc_vc_ack(SVCXPRT *xprt, uint32_t *ack) { *ack = atomic_load_acq_32(&xprt->xp_snt_cnt); *ack -= sbused(&xprt->xp_socket->so_snd); return (TRUE); } static enum xprt_stat svc_vc_backchannel_stat(SVCXPRT *xprt) { struct cf_conn *cd; cd = (struct cf_conn *)(xprt->xp_p1); if (cd->mreq != NULL) return (XPRT_MOREREQS); return (XPRT_IDLE); } /* * If we have an mbuf chain in cd->mpending, try to parse a record from it, * leaving the result in cd->mreq. If we don't have a complete record, leave * the partial result in cd->mreq and try to read more from the socket. */ static int svc_vc_process_pending(SVCXPRT *xprt) { struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1; struct socket *so = xprt->xp_socket; struct mbuf *m; { struct mbuf *m1, *m2, *m3, *m4; int txxxx; m3 = cd->mpending; m4 = NULL; while (m3 != NULL && (m3->m_flags & M_NOMAP) != 0) { m4 = m3; m3 = m3->m_next; } if (m3 != NULL) { txxxx = m_length(m3, NULL); if (txxxx > 0) { m1 = mb_copym_ext_pgs(m3, txxxx, 16384, M_WAITOK, - false, mb_free_mext_pgs, &m2); + mb_free_mext_pgs, &m2); if (m4 != NULL) { m4->m_next = m1; m_freem(m3); } else { m2 = cd->mpending; cd->mpending = m1; m_freem(m2); } } } } /* * If cd->resid is non-zero, we have part of the * record already, otherwise we are expecting a record * marker. */ if (!cd->resid && cd->mpending) { /* * See if there is enough data buffered to * make up a record marker. Make sure we can * handle the case where the record marker is * split across more than one mbuf. */ size_t n = 0; uint32_t header; m = cd->mpending; while (n < sizeof(uint32_t) && m) { n += m->m_len; m = m->m_next; } if (n < sizeof(uint32_t)) { so->so_rcv.sb_lowat = sizeof(uint32_t) - n; return (FALSE); } m_copydata(cd->mpending, 0, sizeof(header), (char *)&header); header = ntohl(header); cd->eor = (header & 0x80000000) != 0; cd->resid = header & 0x7fffffff; cd->resid += sizeof(uint32_t); } /* * Start pulling off mbufs from cd->mpending * until we either have a complete record or * we run out of data. We use m_split to pull * data - it will pull as much as possible and * split the last mbuf if necessary. */ while (cd->mpending && cd->resid) { m = cd->mpending; if (cd->mpending->m_next || cd->mpending->m_len > cd->resid) { if ((cd->mpending->m_flags & M_NOMAP) != 0) cd->mpending = mb_splitatpos_ext( cd->mpending, cd->resid, M_WAITOK); else cd->mpending = m_split(cd->mpending, cd->resid, M_WAITOK); } else cd->mpending = NULL; if (cd->mreq) m_last(cd->mreq)->m_next = m; else cd->mreq = m; while (m) { cd->resid -= m->m_len; m = m->m_next; } } /* * Block receive upcalls if we have more data pending, * otherwise report our need. */ if (cd->mpending) so->so_rcv.sb_lowat = INT_MAX; else so->so_rcv.sb_lowat = imax(1, imin(cd->resid, so->so_rcv.sb_hiwat / 2)); return (TRUE); } static bool_t svc_vc_recv(SVCXPRT *xprt, struct rpc_msg *msg, struct sockaddr **addrp, struct mbuf **mp) { struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1; struct uio uio; struct mbuf *m; struct socket* so = xprt->xp_socket; XDR xdrs; int error, rcvflag; uint32_t xid_plus_direction[3], junk; /* * Serialise access to the socket and our own record parsing * state. */ sx_xlock(&xprt->xp_lock); for (;;) { /* If we have no request ready, check pending queue. */ while (cd->mpending && (cd->mreq == NULL || cd->resid != 0 || !cd->eor)) { if (!svc_vc_process_pending(xprt)) break; } /* Process and return complete request in cd->mreq. */ if (cd->mreq != NULL && cd->resid == 0 && cd->eor) { /* * Now, check for a backchannel reply. * The XID is in the first uint32_t of the reply * and the message direction is the second one. */ if ((cd->mreq->m_len >= sizeof(xid_plus_direction) || m_length(cd->mreq, NULL) >= sizeof(xid_plus_direction)) && xprt->xp_p2 != NULL) { m_copydata(cd->mreq, 0, sizeof(xid_plus_direction), (char *)xid_plus_direction); xid_plus_direction[1] = ntohl(xid_plus_direction[1]); xid_plus_direction[2] = ntohl(xid_plus_direction[2]); /* Check message direction. */ if (xid_plus_direction[2] == REPLY) { clnt_bck_svccall(xprt->xp_p2, cd->mreq, xid_plus_direction[1]); cd->mreq = NULL; continue; } } xdrmbuf_create(&xdrs, cd->mreq, XDR_DECODE); cd->mreq = NULL; /* Check for next request in a pending queue. */ svc_vc_process_pending(xprt); if (cd->mreq == NULL || cd->resid != 0) { SOCKBUF_LOCK(&so->so_rcv); if (!soreadable(so)) xprt_inactive_self(xprt); SOCKBUF_UNLOCK(&so->so_rcv); } sx_xunlock(&xprt->xp_lock); if (! xdr_uint32_t(&xdrs, &junk) || ! xdr_callmsg(&xdrs, msg)) { XDR_DESTROY(&xdrs); return (FALSE); } *addrp = NULL; *mp = xdrmbuf_getall(&xdrs); if (((*mp)->m_flags & M_NOMAP) != 0) xprt->xp_mbufoffs = xdrs.x_handy; else xprt->xp_mbufoffs = 0; XDR_DESTROY(&xdrs); return (TRUE); } /* * If receiving is disabled so that a TLS handshake can be * done by the rpctlssd daemon, return FALSE here. */ if (xprt->xp_dontrcv) { sx_xunlock(&xprt->xp_lock); return (FALSE); } /* * The socket upcall calls xprt_active() which will eventually * cause the server to call us here. We attempt to * read as much as possible from the socket and put * the result in cd->mpending. If the read fails, * we have drained both cd->mpending and the socket so * we can call xprt_inactive(). */ uio.uio_resid = 1000000000; uio.uio_td = curthread; m = NULL; rcvflag = MSG_DONTWAIT; error = soreceive(so, NULL, &uio, &m, NULL, &rcvflag); if (error == EWOULDBLOCK) { /* * We must re-test for readability after * taking the lock to protect us in the case * where a new packet arrives on the socket * after our call to soreceive fails with * EWOULDBLOCK. */ SOCKBUF_LOCK(&so->so_rcv); if (!soreadable(so)) xprt_inactive_self(xprt); SOCKBUF_UNLOCK(&so->so_rcv); sx_xunlock(&xprt->xp_lock); return (FALSE); } if (error) { SOCKBUF_LOCK(&so->so_rcv); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; soupcall_clear(so, SO_RCV); } SOCKBUF_UNLOCK(&so->so_rcv); xprt_inactive_self(xprt); cd->strm_stat = XPRT_DIED; sx_xunlock(&xprt->xp_lock); return (FALSE); } if (!m) { /* * EOF - the other end has closed the socket. */ xprt_inactive_self(xprt); cd->strm_stat = XPRT_DIED; sx_xunlock(&xprt->xp_lock); return (FALSE); } if (cd->mpending) m_last(cd->mpending)->m_next = m; else cd->mpending = m; } } static bool_t svc_vc_backchannel_recv(SVCXPRT *xprt, struct rpc_msg *msg, struct sockaddr **addrp, struct mbuf **mp) { struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1; struct ct_data *ct; struct mbuf *m; XDR xdrs; sx_xlock(&xprt->xp_lock); ct = (struct ct_data *)xprt->xp_p2; if (ct == NULL) { sx_xunlock(&xprt->xp_lock); return (FALSE); } mtx_lock(&ct->ct_lock); m = cd->mreq; if (m == NULL) { xprt_inactive_self(xprt); mtx_unlock(&ct->ct_lock); sx_xunlock(&xprt->xp_lock); return (FALSE); } cd->mreq = m->m_nextpkt; mtx_unlock(&ct->ct_lock); sx_xunlock(&xprt->xp_lock); printf("recv backch m=%p\n", m); { struct mbuf *m1, *m2; int txxxx; if (m != NULL) { txxxx = m_length(m, NULL); if (txxxx > 0) { m1 = mb_copym_ext_pgs(m, txxxx, 16384, M_WAITOK, - false, mb_free_mext_pgs, &m2); + mb_free_mext_pgs, &m2); m2 = m; m = m1; m_freem(m2); } } } xdrmbuf_create(&xdrs, m, XDR_DECODE); if (! xdr_callmsg(&xdrs, msg)) { printf("recv backch callmsg failed\n"); XDR_DESTROY(&xdrs); return (FALSE); } *addrp = NULL; *mp = xdrmbuf_getall(&xdrs); if (((*mp)->m_flags & M_NOMAP) != 0) xprt->xp_mbufoffs = xdrs.x_handy; else xprt->xp_mbufoffs = 0; printf("backch offs=%d\n", xprt->xp_mbufoffs); XDR_DESTROY(&xdrs); return (TRUE); } static bool_t svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg, struct sockaddr *addr, struct mbuf *m, uint32_t *seq) { XDR xdrs; struct mbuf *mrep; bool_t stat = TRUE; int error, len, maxextsiz; /* * Leave space for record mark. */ mrep = m_gethdr(M_WAITOK, MT_DATA); mrep->m_data += sizeof(uint32_t); xdrmbuf_create(&xdrs, mrep, XDR_ENCODE); if (msg->rm_reply.rp_stat == MSG_ACCEPTED && msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { if (!xdr_replymsg(&xdrs, msg)) stat = FALSE; else xdrmbuf_append(&xdrs, m); } else { stat = xdr_replymsg(&xdrs, msg); } if (stat) { m_fixhdr(mrep); /* * Prepend a record marker containing the reply length. */ M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK); len = mrep->m_pkthdr.len; *mtod(mrep, uint32_t *) = htonl(0x80000000 | (len - sizeof(uint32_t))); /* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */ if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) { /* * Copy the mbuf chain to a chain of * ext_pgs mbuf(s) as required by KERN_TLS. */ maxextsiz = TLS_MAX_MSG_SIZE_V10_2; #ifdef KERN_TLS maxextsiz = min(maxextsiz, ktls_maxlen); #endif mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz); } atomic_add_32(&xprt->xp_snd_cnt, len); /* * sosend consumes mreq. */ error = sosend(xprt->xp_socket, NULL, NULL, mrep, NULL, 0, curthread); if (!error) { atomic_add_rel_32(&xprt->xp_snt_cnt, len); if (seq) *seq = xprt->xp_snd_cnt; stat = TRUE; } else atomic_subtract_32(&xprt->xp_snd_cnt, len); } else { m_freem(mrep); } XDR_DESTROY(&xdrs); return (stat); } static bool_t svc_vc_backchannel_reply(SVCXPRT *xprt, struct rpc_msg *msg, struct sockaddr *addr, struct mbuf *m, uint32_t *seq) { struct ct_data *ct; XDR xdrs; struct mbuf *mrep; bool_t stat = TRUE; int error, maxextsiz; /* * Leave space for record mark. */ mrep = m_gethdr(M_WAITOK, MT_DATA); mrep->m_data += sizeof(uint32_t); xdrmbuf_create(&xdrs, mrep, XDR_ENCODE); if (msg->rm_reply.rp_stat == MSG_ACCEPTED && msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { if (!xdr_replymsg(&xdrs, msg)) stat = FALSE; else xdrmbuf_append(&xdrs, m); } else { stat = xdr_replymsg(&xdrs, msg); } if (stat) { m_fixhdr(mrep); /* * Prepend a record marker containing the reply length. */ M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK); *mtod(mrep, uint32_t *) = htonl(0x80000000 | (mrep->m_pkthdr.len - sizeof(uint32_t))); /* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */ if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) { /* * Copy the mbuf chain to a chain of * ext_pgs mbuf(s) as required by KERN_TLS. */ maxextsiz = TLS_MAX_MSG_SIZE_V10_2; #ifdef KERN_TLS maxextsiz = min(maxextsiz, ktls_maxlen); #endif mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz); } sx_xlock(&xprt->xp_lock); ct = (struct ct_data *)xprt->xp_p2; if (ct != NULL) error = sosend(ct->ct_socket, NULL, NULL, mrep, NULL, 0, curthread); else error = EPIPE; sx_xunlock(&xprt->xp_lock); if (!error) { stat = TRUE; } } else { m_freem(mrep); } XDR_DESTROY(&xdrs); return (stat); } static bool_t svc_vc_null() { return (FALSE); } static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag) { SVCXPRT *xprt = (SVCXPRT *) arg; if (soreadable(xprt->xp_socket)) xprt_active(xprt); return (SU_OK); } static int svc_vc_rendezvous_soupcall(struct socket *head, void *arg, int waitflag) { SVCXPRT *xprt = (SVCXPRT *) arg; if (!TAILQ_EMPTY(&head->sol_comp)) xprt_active(xprt); return (SU_OK); } #if 0 /* * Get the effective UID of the sending process. Used by rpcbind, keyserv * and rpc.yppasswdd on AF_LOCAL. */ int __rpc_get_local_uid(SVCXPRT *transp, uid_t *uid) { int sock, ret; gid_t egid; uid_t euid; struct sockaddr *sa; sock = transp->xp_fd; sa = (struct sockaddr *)transp->xp_rtaddr; if (sa->sa_family == AF_LOCAL) { ret = getpeereid(sock, &euid, &egid); if (ret == 0) *uid = euid; return (ret); } else return (-1); } #endif Index: projects/nfs-over-tls/sys/sys/mbuf.h =================================================================== --- projects/nfs-over-tls/sys/sys/mbuf.h (revision 360215) +++ projects/nfs-over-tls/sys/sys/mbuf.h (revision 360216) @@ -1,1524 +1,1524 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)mbuf.h 8.5 (Berkeley) 2/19/95 * $FreeBSD$ */ #ifndef _SYS_MBUF_H_ #define _SYS_MBUF_H_ /* XXX: These includes suck. Sorry! */ #include #ifdef _KERNEL #include #include #include #ifdef WITNESS #include #endif #endif #ifdef _KERNEL #include #define MBUF_PROBE1(probe, arg0) \ SDT_PROBE1(sdt, , , probe, arg0) #define MBUF_PROBE2(probe, arg0, arg1) \ SDT_PROBE2(sdt, , , probe, arg0, arg1) #define MBUF_PROBE3(probe, arg0, arg1, arg2) \ SDT_PROBE3(sdt, , , probe, arg0, arg1, arg2) #define MBUF_PROBE4(probe, arg0, arg1, arg2, arg3) \ SDT_PROBE4(sdt, , , probe, arg0, arg1, arg2, arg3) #define MBUF_PROBE5(probe, arg0, arg1, arg2, arg3, arg4) \ SDT_PROBE5(sdt, , , probe, arg0, arg1, arg2, arg3, arg4) SDT_PROBE_DECLARE(sdt, , , m__init); SDT_PROBE_DECLARE(sdt, , , m__gethdr); SDT_PROBE_DECLARE(sdt, , , m__get); SDT_PROBE_DECLARE(sdt, , , m__getcl); SDT_PROBE_DECLARE(sdt, , , m__clget); SDT_PROBE_DECLARE(sdt, , , m__cljget); SDT_PROBE_DECLARE(sdt, , , m__cljset); SDT_PROBE_DECLARE(sdt, , , m__free); SDT_PROBE_DECLARE(sdt, , , m__freem); #endif /* _KERNEL */ /* * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead. * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in * sys/param.h), which has no additional overhead and is used instead of the * internal data area; this is done when at least MINCLSIZE of data must be * stored. Additionally, it is possible to allocate a separate buffer * externally and attach it to the mbuf in a way similar to that of mbuf * clusters. * * NB: These calculation do not take actual compiler-induced alignment and * padding inside the complete struct mbuf into account. Appropriate * attention is required when changing members of struct mbuf. * * MLEN is data length in a normal mbuf. * MHLEN is data length in an mbuf with pktheader. * MINCLSIZE is a smallest amount of data that should be put into cluster. * * Compile-time assertions in uipc_mbuf.c test these values to ensure that * they are sensible. */ struct mbuf; #define MHSIZE offsetof(struct mbuf, m_dat) #define MPKTHSIZE offsetof(struct mbuf, m_pktdat) #define MLEN ((int)(MSIZE - MHSIZE)) #define MHLEN ((int)(MSIZE - MPKTHSIZE)) #define MINCLSIZE (MHLEN + 1) #define M_NODOM 255 #ifdef _KERNEL /*- * Macro for type conversion: convert mbuf pointer to data pointer of correct * type: * * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type. * mtodo(m, o) -- Same as above but with offset 'o' into data. */ #define mtod(m, t) ((t)((m)->m_data)) #define mtodo(m, o) ((void *)(((m)->m_data) + (o))) /* * Argument structure passed to UMA routines during mbuf and packet * allocations. */ struct mb_args { int flags; /* Flags for mbuf being allocated */ short type; /* Type of mbuf being allocated */ }; #endif /* _KERNEL */ /* * Packet tag structure (see below for details). */ struct m_tag { SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */ u_int16_t m_tag_id; /* Tag ID */ u_int16_t m_tag_len; /* Length of data */ u_int32_t m_tag_cookie; /* ABI/Module ID */ void (*m_tag_free)(struct m_tag *); }; /* * Static network interface owned tag. * Allocated through ifp->if_snd_tag_alloc(). */ struct m_snd_tag { struct ifnet *ifp; /* network interface tag belongs to */ volatile u_int refcount; }; /* * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. * Size ILP32: 48 * LP64: 56 * Compile-time assertions in uipc_mbuf.c test these values to ensure that * they are correct. */ struct pkthdr { union { struct m_snd_tag *snd_tag; /* send tag, if any */ struct ifnet *rcvif; /* rcv interface */ }; SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ int32_t len; /* total packet length */ /* Layer crossing persistent information. */ uint32_t flowid; /* packet's 4-tuple system */ uint32_t csum_flags; /* checksum and offload features */ uint16_t fibnum; /* this packet should use this fib */ uint8_t numa_domain; /* NUMA domain of recvd pkt */ uint8_t rsstype; /* hash type */ union { uint64_t rcv_tstmp; /* timestamp in ns */ struct { uint8_t l2hlen; /* layer 2 hdr len */ uint8_t l3hlen; /* layer 3 hdr len */ uint8_t l4hlen; /* layer 4 hdr len */ uint8_t l5hlen; /* layer 5 hdr len */ uint32_t spare; }; }; union { uint8_t eight[8]; uint16_t sixteen[4]; uint32_t thirtytwo[2]; uint64_t sixtyfour[1]; uintptr_t unintptr[1]; void *ptr; } PH_per; /* Layer specific non-persistent local storage for reassembly, etc. */ union { uint8_t eight[8]; uint16_t sixteen[4]; uint32_t thirtytwo[2]; uint64_t sixtyfour[1]; uintptr_t unintptr[1]; void *ptr; } PH_loc; }; #define ether_vtag PH_per.sixteen[0] #define PH_vt PH_per #define vt_nrecs sixteen[0] /* mld and v6-ND */ #define tso_segsz PH_per.sixteen[1] /* inbound after LRO */ #define lro_nsegs tso_segsz /* inbound after LRO */ #define csum_data PH_per.thirtytwo[1] /* inbound from hardware up */ #define lro_len PH_loc.sixteen[0] /* inbound during LRO (no reassembly) */ #define lro_csum PH_loc.sixteen[1] /* inbound during LRO (no reassembly) */ /* Note PH_loc is used during IP reassembly (all 8 bytes as a ptr) */ /* * TLS records for TLS 1.0-1.2 can have the following header lengths: * - 5 (AES-CBC with implicit IV) * - 21 (AES-CBC with explicit IV) * - 13 (AES-GCM with 8 byte explicit IV) */ #define MBUF_PEXT_HDR_LEN 23 /* * TLS records for TLS 1.0-1.2 can have the following maximum trailer * lengths: * - 16 (AES-GCM) * - 36 (AES-CBC with SHA1 and up to 16 bytes of padding) * - 48 (AES-CBC with SHA2-256 and up to 16 bytes of padding) * - 64 (AES-CBC with SHA2-384 and up to 16 bytes of padding) */ #define MBUF_PEXT_TRAIL_LEN 64 #if defined(__LP64__) #define MBUF_PEXT_MAX_PGS (40 / sizeof(vm_paddr_t)) #else #define MBUF_PEXT_MAX_PGS (72 / sizeof(vm_paddr_t)) #endif #define MBUF_PEXT_MAX_BYTES \ (MBUF_PEXT_MAX_PGS * PAGE_SIZE + MBUF_PEXT_HDR_LEN + MBUF_PEXT_TRAIL_LEN) #define MBUF_PEXT_FLAG_ANON 1 /* Data can be encrypted in place. */ struct mbuf_ext_pgs_data { vm_paddr_t pa[MBUF_PEXT_MAX_PGS]; /* phys addrs of pgs */ char trail[MBUF_PEXT_TRAIL_LEN]; /* TLS trailer */ char hdr[MBUF_PEXT_HDR_LEN]; /* TLS header */ }; struct ktls_session; struct socket; /* * Description of external storage mapped into mbuf; valid only if M_EXT is * set. * Size ILP32: 28 * LP64: 48 * Compile-time assertions in uipc_mbuf.c test these values to ensure that * they are correct. */ typedef void m_ext_free_t(struct mbuf *); struct m_ext { union { /* * If EXT_FLAG_EMBREF is set, then we use refcount in the * mbuf, the 'ext_count' member. Otherwise, we have a * shadow copy and we use pointer 'ext_cnt'. The original * mbuf is responsible to carry the pointer to free routine * and its arguments. They aren't copied into shadows in * mb_dupcl() to avoid dereferencing next cachelines. */ volatile u_int ext_count; volatile u_int *ext_cnt; }; uint32_t ext_size; /* size of buffer, for ext_free */ uint32_t ext_type:8, /* type of external storage */ ext_flags:24; /* external storage mbuf flags */ char *ext_buf; /* start of buffer */ /* * Fields below store the free context for the external storage. * They are valid only in the refcount carrying mbuf, the one with * EXT_FLAG_EMBREF flag, with exclusion for EXT_EXTREF type, where * the free context is copied into all mbufs that use same external * storage. */ #define m_ext_copylen offsetof(struct m_ext, ext_free) m_ext_free_t *ext_free; /* free routine if not the usual */ void *ext_arg1; /* optional argument pointer */ union { void *ext_arg2; /* optional argument pointer */ struct mbuf_ext_pgs_data ext_pgs; }; }; struct mbuf_ext_pgs { uint8_t npgs; /* Number of attached pages */ uint8_t nrdy; /* Pages with I/O pending */ uint8_t hdr_len; /* TLS header length */ uint8_t trail_len; /* TLS trailer length */ uint16_t first_pg_off; /* Offset into 1st page */ uint16_t last_pg_len; /* Length of last page */ uint8_t flags; /* Flags */ uint8_t record_type; uint8_t spare[2]; int enc_cnt; struct ktls_session *tls; /* TLS session */ struct socket *so; uint64_t seqno; struct mbuf *mbuf; STAILQ_ENTRY(mbuf_ext_pgs) stailq; #if !defined(__LP64__) uint8_t pad[8]; /* pad to size of pkthdr */ #endif struct m_ext m_ext; }; #define m_epg_hdr m_ext.ext_pgs.hdr #define m_epg_trail m_ext.ext_pgs.trail #define m_epg_pa m_ext.ext_pgs.pa /* * The core of the mbuf object along with some shortcut defines for practical * purposes. */ struct mbuf { /* * Header present at the beginning of every mbuf. * Size ILP32: 24 * LP64: 32 * Compile-time assertions in uipc_mbuf.c test these values to ensure * that they are correct. */ union { /* next buffer in chain */ struct mbuf *m_next; SLIST_ENTRY(mbuf) m_slist; STAILQ_ENTRY(mbuf) m_stailq; }; union { /* next chain in queue/record */ struct mbuf *m_nextpkt; SLIST_ENTRY(mbuf) m_slistpkt; STAILQ_ENTRY(mbuf) m_stailqpkt; }; caddr_t m_data; /* location of data */ int32_t m_len; /* amount of data in this mbuf */ uint32_t m_type:8, /* type of data in this mbuf */ m_flags:24; /* flags; see below */ #if !defined(__LP64__) uint32_t m_pad; /* pad for 64bit alignment */ #endif /* * A set of optional headers (packet header, external storage header) * and internal data storage. Historically, these arrays were sized * to MHLEN (space left after a packet header) and MLEN (space left * after only a regular mbuf header); they are now variable size in * order to support future work on variable-size mbufs. */ union { union { struct { struct pkthdr m_pkthdr; /* M_PKTHDR set */ union { struct m_ext m_ext; /* M_EXT set */ char m_pktdat[0]; }; }; struct mbuf_ext_pgs m_ext_pgs; }; char m_dat[0]; /* !M_PKTHDR, !M_EXT */ }; }; #ifdef _KERNEL static inline int mbuf_ext_pg_len(struct mbuf_ext_pgs *ext_pgs, int pidx, int pgoff) { KASSERT(pgoff == 0 || pidx == 0, ("page %d with non-zero offset %d in %p", pidx, pgoff, ext_pgs)); if (pidx == ext_pgs->npgs - 1) { return (ext_pgs->last_pg_len); } else { return (PAGE_SIZE - pgoff); } } #ifdef INVARIANT_SUPPORT void mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs); #endif #ifdef INVARIANTS #define MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs) mb_ext_pgs_check((ext_pgs)) #else #define MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs) #endif #endif /* * mbuf flags of global significance and layer crossing. * Those of only protocol/layer specific significance are to be mapped * to M_PROTO[1-11] and cleared at layer handoff boundaries. * NB: Limited to the lower 24 bits. */ #define M_EXT 0x00000001 /* has associated external storage */ #define M_PKTHDR 0x00000002 /* start of record */ #define M_EOR 0x00000004 /* end of record */ #define M_RDONLY 0x00000008 /* associated data is marked read-only */ #define M_BCAST 0x00000010 /* send/received as link-level broadcast */ #define M_MCAST 0x00000020 /* send/received as link-level multicast */ #define M_PROMISC 0x00000040 /* packet was not for us */ #define M_VLANTAG 0x00000080 /* ether_vtag is valid */ #define M_NOMAP 0x00000100 /* mbuf data is unmapped */ #define M_NOFREE 0x00000200 /* do not free mbuf, embedded in cluster */ #define M_TSTMP 0x00000400 /* rcv_tstmp field is valid */ #define M_TSTMP_HPREC 0x00000800 /* rcv_tstmp is high-prec, typically hw-stamped on port (useful for IEEE 1588 and 802.1AS) */ #define M_TSTMP_LRO 0x00001000 /* Time LRO pushed in pkt is valid in (PH_loc) */ #define M_PROTO1 0x00002000 /* protocol-specific */ #define M_PROTO2 0x00004000 /* protocol-specific */ #define M_PROTO3 0x00008000 /* protocol-specific */ #define M_PROTO4 0x00010000 /* protocol-specific */ #define M_PROTO5 0x00020000 /* protocol-specific */ #define M_PROTO6 0x00040000 /* protocol-specific */ #define M_PROTO7 0x00080000 /* protocol-specific */ #define M_PROTO8 0x00100000 /* protocol-specific */ #define M_PROTO9 0x00200000 /* protocol-specific */ #define M_PROTO10 0x00400000 /* protocol-specific */ #define M_PROTO11 0x00800000 /* protocol-specific */ #define MB_DTOR_SKIP 0x1 /* don't pollute the cache by touching a freed mbuf */ /* * Flags to purge when crossing layers. */ #define M_PROTOFLAGS \ (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\ M_PROTO9|M_PROTO10|M_PROTO11) /* * Flags preserved when copying m_pkthdr. */ #define M_COPYFLAGS \ (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG|M_TSTMP| \ M_TSTMP_HPREC|M_TSTMP_LRO|M_PROTOFLAGS) /* * Mbuf flag description for use with printf(9) %b identifier. */ #define M_FLAG_BITS \ "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \ "\7M_PROMISC\10M_VLANTAG\11M_NOMAP\12M_NOFREE\13M_TSTMP\14M_TSTMP_HPREC\15M_TSTMP_LRO" #define M_FLAG_PROTOBITS \ "\16M_PROTO1\17M_PROTO2\20M_PROTO3\21M_PROTO4" \ "\22M_PROTO5\23M_PROTO6\24M_PROTO7\25M_PROTO8\26M_PROTO9" \ "\27M_PROTO10\28M_PROTO11" #define M_FLAG_PRINTF (M_FLAG_BITS M_FLAG_PROTOBITS) /* * Network interface cards are able to hash protocol fields (such as IPv4 * addresses and TCP port numbers) classify packets into flows. These flows * can then be used to maintain ordering while delivering packets to the OS * via parallel input queues, as well as to provide a stateless affinity * model. NIC drivers can pass up the hash via m->m_pkthdr.flowid, and set * m_flag fields to indicate how the hash should be interpreted by the * network stack. * * Most NICs support RSS, which provides ordering and explicit affinity, and * use the hash m_flag bits to indicate what header fields were covered by * the hash. M_HASHTYPE_OPAQUE and M_HASHTYPE_OPAQUE_HASH can be set by non- * RSS cards or configurations that provide an opaque flow identifier, allowing * for ordering and distribution without explicit affinity. Additionally, * M_HASHTYPE_OPAQUE_HASH indicates that the flow identifier has hash * properties. * * The meaning of the IPV6_EX suffix: * "o Home address from the home address option in the IPv6 destination * options header. If the extension header is not present, use the Source * IPv6 Address. * o IPv6 address that is contained in the Routing-Header-Type-2 from the * associated extension header. If the extension header is not present, * use the Destination IPv6 Address." * Quoted from: * https://docs.microsoft.com/en-us/windows-hardware/drivers/network/rss-hashing-types#ndishashipv6ex */ #define M_HASHTYPE_HASHPROP 0x80 /* has hash properties */ #define M_HASHTYPE_HASH(t) (M_HASHTYPE_HASHPROP | (t)) /* Microsoft RSS standard hash types */ #define M_HASHTYPE_NONE 0 #define M_HASHTYPE_RSS_IPV4 M_HASHTYPE_HASH(1) /* IPv4 2-tuple */ #define M_HASHTYPE_RSS_TCP_IPV4 M_HASHTYPE_HASH(2) /* TCPv4 4-tuple */ #define M_HASHTYPE_RSS_IPV6 M_HASHTYPE_HASH(3) /* IPv6 2-tuple */ #define M_HASHTYPE_RSS_TCP_IPV6 M_HASHTYPE_HASH(4) /* TCPv6 4-tuple */ #define M_HASHTYPE_RSS_IPV6_EX M_HASHTYPE_HASH(5) /* IPv6 2-tuple + * ext hdrs */ #define M_HASHTYPE_RSS_TCP_IPV6_EX M_HASHTYPE_HASH(6) /* TCPv6 4-tuple + * ext hdrs */ #define M_HASHTYPE_RSS_UDP_IPV4 M_HASHTYPE_HASH(7) /* IPv4 UDP 4-tuple*/ #define M_HASHTYPE_RSS_UDP_IPV6 M_HASHTYPE_HASH(9) /* IPv6 UDP 4-tuple*/ #define M_HASHTYPE_RSS_UDP_IPV6_EX M_HASHTYPE_HASH(10)/* IPv6 UDP 4-tuple + * ext hdrs */ #define M_HASHTYPE_OPAQUE 63 /* ordering, not affinity */ #define M_HASHTYPE_OPAQUE_HASH M_HASHTYPE_HASH(M_HASHTYPE_OPAQUE) /* ordering+hash, not affinity*/ #define M_HASHTYPE_CLEAR(m) ((m)->m_pkthdr.rsstype = 0) #define M_HASHTYPE_GET(m) ((m)->m_pkthdr.rsstype) #define M_HASHTYPE_SET(m, v) ((m)->m_pkthdr.rsstype = (v)) #define M_HASHTYPE_TEST(m, v) (M_HASHTYPE_GET(m) == (v)) #define M_HASHTYPE_ISHASH(m) (M_HASHTYPE_GET(m) & M_HASHTYPE_HASHPROP) /* * External mbuf storage buffer types. */ #define EXT_CLUSTER 1 /* mbuf cluster */ #define EXT_SFBUF 2 /* sendfile(2)'s sf_buf */ #define EXT_JUMBOP 3 /* jumbo cluster page sized */ #define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */ #define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ #define EXT_PACKET 6 /* mbuf+cluster from packet zone */ #define EXT_MBUF 7 /* external mbuf reference */ #define EXT_RXRING 8 /* data in NIC receive ring */ #define EXT_PGS 9 /* array of unmapped pages */ #define EXT_VENDOR1 224 /* for vendor-internal use */ #define EXT_VENDOR2 225 /* for vendor-internal use */ #define EXT_VENDOR3 226 /* for vendor-internal use */ #define EXT_VENDOR4 227 /* for vendor-internal use */ #define EXT_EXP1 244 /* for experimental use */ #define EXT_EXP2 245 /* for experimental use */ #define EXT_EXP3 246 /* for experimental use */ #define EXT_EXP4 247 /* for experimental use */ #define EXT_NET_DRV 252 /* custom ext_buf provided by net driver(s) */ #define EXT_MOD_TYPE 253 /* custom module's ext_buf type */ #define EXT_DISPOSABLE 254 /* can throw this buffer away w/page flipping */ #define EXT_EXTREF 255 /* has externally maintained ext_cnt ptr */ /* * Flags for external mbuf buffer types. * NB: limited to the lower 24 bits. */ #define EXT_FLAG_EMBREF 0x000001 /* embedded ext_count */ #define EXT_FLAG_EXTREF 0x000002 /* external ext_cnt, notyet */ #define EXT_FLAG_NOFREE 0x000010 /* don't free mbuf to pool, notyet */ #define EXT_FLAG_VENDOR1 0x010000 /* These flags are vendor */ #define EXT_FLAG_VENDOR2 0x020000 /* or submodule specific, */ #define EXT_FLAG_VENDOR3 0x040000 /* not used by mbuf code. */ #define EXT_FLAG_VENDOR4 0x080000 /* Set/read by submodule. */ #define EXT_FLAG_EXP1 0x100000 /* for experimental use */ #define EXT_FLAG_EXP2 0x200000 /* for experimental use */ #define EXT_FLAG_EXP3 0x400000 /* for experimental use */ #define EXT_FLAG_EXP4 0x800000 /* for experimental use */ /* * EXT flag description for use with printf(9) %b identifier. */ #define EXT_FLAG_BITS \ "\20\1EXT_FLAG_EMBREF\2EXT_FLAG_EXTREF\5EXT_FLAG_NOFREE" \ "\21EXT_FLAG_VENDOR1\22EXT_FLAG_VENDOR2\23EXT_FLAG_VENDOR3" \ "\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \ "\30EXT_FLAG_EXP4" #define MBUF_EXT_PGS_ASSERT(m) \ KASSERT((((m)->m_flags & M_EXT) != 0) && \ ((m)->m_ext.ext_type == EXT_PGS), \ ("%s: m %p !M_EXT or !EXT_PGS", __func__, m)) /* * Flags indicating checksum, segmentation and other offload work to be * done, or already done, by hardware or lower layers. It is split into * separate inbound and outbound flags. * * Outbound flags that are set by upper protocol layers requesting lower * layers, or ideally the hardware, to perform these offloading tasks. * For outbound packets this field and its flags can be directly tested * against ifnet if_hwassist. */ #define CSUM_IP 0x00000001 /* IP header checksum offload */ #define CSUM_IP_UDP 0x00000002 /* UDP checksum offload */ #define CSUM_IP_TCP 0x00000004 /* TCP checksum offload */ #define CSUM_IP_SCTP 0x00000008 /* SCTP checksum offload */ #define CSUM_IP_TSO 0x00000010 /* TCP segmentation offload */ #define CSUM_IP_ISCSI 0x00000020 /* iSCSI checksum offload */ #define CSUM_IP6_UDP 0x00000200 /* UDP checksum offload */ #define CSUM_IP6_TCP 0x00000400 /* TCP checksum offload */ #define CSUM_IP6_SCTP 0x00000800 /* SCTP checksum offload */ #define CSUM_IP6_TSO 0x00001000 /* TCP segmentation offload */ #define CSUM_IP6_ISCSI 0x00002000 /* iSCSI checksum offload */ /* Inbound checksum support where the checksum was verified by hardware. */ #define CSUM_L3_CALC 0x01000000 /* calculated layer 3 csum */ #define CSUM_L3_VALID 0x02000000 /* checksum is correct */ #define CSUM_L4_CALC 0x04000000 /* calculated layer 4 csum */ #define CSUM_L4_VALID 0x08000000 /* checksum is correct */ #define CSUM_L5_CALC 0x10000000 /* calculated layer 5 csum */ #define CSUM_L5_VALID 0x20000000 /* checksum is correct */ #define CSUM_COALESCED 0x40000000 /* contains merged segments */ #define CSUM_SND_TAG 0x80000000 /* Packet header has send tag */ /* * CSUM flag description for use with printf(9) %b identifier. */ #define CSUM_BITS \ "\20\1CSUM_IP\2CSUM_IP_UDP\3CSUM_IP_TCP\4CSUM_IP_SCTP\5CSUM_IP_TSO" \ "\6CSUM_IP_ISCSI" \ "\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \ "\16CSUM_IP6_ISCSI" \ "\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \ "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED\40CSUM_SND_TAG" /* CSUM flags compatibility mappings. */ #define CSUM_IP_CHECKED CSUM_L3_CALC #define CSUM_IP_VALID CSUM_L3_VALID #define CSUM_DATA_VALID CSUM_L4_VALID #define CSUM_PSEUDO_HDR CSUM_L4_CALC #define CSUM_SCTP_VALID CSUM_L4_VALID #define CSUM_DELAY_DATA (CSUM_TCP|CSUM_UDP) #define CSUM_DELAY_IP CSUM_IP /* Only v4, no v6 IP hdr csum */ #define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6|CSUM_UDP_IPV6) #define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID #define CSUM_TCP CSUM_IP_TCP #define CSUM_UDP CSUM_IP_UDP #define CSUM_SCTP CSUM_IP_SCTP #define CSUM_TSO (CSUM_IP_TSO|CSUM_IP6_TSO) #define CSUM_UDP_IPV6 CSUM_IP6_UDP #define CSUM_TCP_IPV6 CSUM_IP6_TCP #define CSUM_SCTP_IPV6 CSUM_IP6_SCTP /* * mbuf types describing the content of the mbuf (including external storage). */ #define MT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */ #define MT_DATA 1 /* dynamic (data) allocation */ #define MT_HEADER MT_DATA /* packet header, use M_PKTHDR instead */ #define MT_VENDOR1 4 /* for vendor-internal use */ #define MT_VENDOR2 5 /* for vendor-internal use */ #define MT_VENDOR3 6 /* for vendor-internal use */ #define MT_VENDOR4 7 /* for vendor-internal use */ #define MT_SONAME 8 /* socket name */ #define MT_EXP1 9 /* for experimental use */ #define MT_EXP2 10 /* for experimental use */ #define MT_EXP3 11 /* for experimental use */ #define MT_EXP4 12 /* for experimental use */ #define MT_CONTROL 14 /* extra-data protocol message */ #define MT_EXTCONTROL 15 /* control message with externalized contents */ #define MT_OOBDATA 16 /* expedited data */ #define MT_NOINIT 255 /* Not a type but a flag to allocate a non-initialized mbuf */ /* * String names of mbuf-related UMA(9) and malloc(9) types. Exposed to * !_KERNEL so that monitoring tools can look up the zones with * libmemstat(3). */ #define MBUF_MEM_NAME "mbuf" #define MBUF_CLUSTER_MEM_NAME "mbuf_cluster" #define MBUF_PACKET_MEM_NAME "mbuf_packet" #define MBUF_JUMBOP_MEM_NAME "mbuf_jumbo_page" #define MBUF_JUMBO9_MEM_NAME "mbuf_jumbo_9k" #define MBUF_JUMBO16_MEM_NAME "mbuf_jumbo_16k" #define MBUF_TAG_MEM_NAME "mbuf_tag" #define MBUF_EXTREFCNT_MEM_NAME "mbuf_ext_refcnt" #define MBUF_EXTPGS_MEM_NAME "mbuf_extpgs" #ifdef _KERNEL #ifdef WITNESS #define MBUF_CHECKSLEEP(how) do { \ if (how == M_WAITOK) \ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, \ "Sleeping in \"%s\"", __func__); \ } while (0) #else #define MBUF_CHECKSLEEP(how) #endif /* * Network buffer allocation API * * The rest of it is defined in kern/kern_mbuf.c */ extern uma_zone_t zone_mbuf; extern uma_zone_t zone_clust; extern uma_zone_t zone_pack; extern uma_zone_t zone_jumbop; extern uma_zone_t zone_jumbo9; extern uma_zone_t zone_jumbo16; extern uma_zone_t zone_extpgs; void mb_dupcl(struct mbuf *, struct mbuf *); void mb_free_ext(struct mbuf *); void mb_free_mext_pgs(struct mbuf *); struct mbuf *mb_alloc_ext_pgs(int, m_ext_free_t); -struct mbuf *mb_alloc_ext_plus_pages(int, int, bool, m_ext_free_t); -struct mbuf *mb_copym_ext_pgs(struct mbuf *, int, int, int, bool, +struct mbuf *mb_alloc_ext_plus_pages(int, int, m_ext_free_t); +struct mbuf *mb_copym_ext_pgs(struct mbuf *, int, int, int, m_ext_free_t, struct mbuf **); struct mbuf *mb_splitatpos_ext(struct mbuf *, int, int); int mb_unmapped_compress(struct mbuf *m); struct mbuf *mb_unmapped_to_ext(struct mbuf *m); void mb_free_notready(struct mbuf *m, int count); void m_adj(struct mbuf *, int); int m_apply(struct mbuf *, int, int, int (*)(void *, void *, u_int), void *); int m_append(struct mbuf *, int, c_caddr_t); void m_cat(struct mbuf *, struct mbuf *); void m_catpkt(struct mbuf *, struct mbuf *); int m_clget(struct mbuf *m, int how); void *m_cljget(struct mbuf *m, int how, int size); struct mbuf *m_collapse(struct mbuf *, int, int); void m_copyback(struct mbuf *, int, int, c_caddr_t); void m_copydata(const struct mbuf *, int, int, caddr_t); struct mbuf *m_copym(struct mbuf *, int, int, int); struct mbuf *m_copypacket(struct mbuf *, int); void m_copy_pkthdr(struct mbuf *, struct mbuf *); struct mbuf *m_copyup(struct mbuf *, int, int); struct mbuf *m_defrag(struct mbuf *, int); void m_demote_pkthdr(struct mbuf *); void m_demote(struct mbuf *, int, int); struct mbuf *m_devget(char *, int, int, struct ifnet *, void (*)(char *, caddr_t, u_int)); void m_dispose_extcontrolm(struct mbuf *m); struct mbuf *m_dup(const struct mbuf *, int); int m_dup_pkthdr(struct mbuf *, const struct mbuf *, int); void m_extadd(struct mbuf *, char *, u_int, m_ext_free_t, void *, void *, int, int); u_int m_fixhdr(struct mbuf *); struct mbuf *m_fragment(struct mbuf *, int, int); void m_freem(struct mbuf *); struct mbuf *m_get2(int, int, short, int); struct mbuf *m_getjcl(int, short, int, int); struct mbuf *m_getm2(struct mbuf *, int, int, short, int); struct mbuf *m_getptr(struct mbuf *, int, int *); u_int m_length(struct mbuf *, struct mbuf **); int m_mbuftouio(struct uio *, const struct mbuf *, int); int m_unmappedtouio(const struct mbuf *, int, struct uio *, int); void m_move_pkthdr(struct mbuf *, struct mbuf *); int m_pkthdr_init(struct mbuf *, int); struct mbuf *m_prepend(struct mbuf *, int, int); void m_print(const struct mbuf *, int); struct mbuf *m_pulldown(struct mbuf *, int, int, int *); struct mbuf *m_pullup(struct mbuf *, int); int m_sanity(struct mbuf *, int); struct mbuf *m_split(struct mbuf *, int, int); struct mbuf *m_uiotombuf(struct uio *, int, int, int, int); struct mbuf *m_unshare(struct mbuf *, int); void m_snd_tag_init(struct m_snd_tag *, struct ifnet *); void m_snd_tag_destroy(struct m_snd_tag *); static __inline int m_gettype(int size) { int type; switch (size) { case MSIZE: type = EXT_MBUF; break; case MCLBYTES: type = EXT_CLUSTER; break; #if MJUMPAGESIZE != MCLBYTES case MJUMPAGESIZE: type = EXT_JUMBOP; break; #endif case MJUM9BYTES: type = EXT_JUMBO9; break; case MJUM16BYTES: type = EXT_JUMBO16; break; default: panic("%s: invalid cluster size %d", __func__, size); } return (type); } /* * Associated an external reference counted buffer with an mbuf. */ static __inline void m_extaddref(struct mbuf *m, char *buf, u_int size, u_int *ref_cnt, m_ext_free_t freef, void *arg1, void *arg2) { KASSERT(ref_cnt != NULL, ("%s: ref_cnt not provided", __func__)); atomic_add_int(ref_cnt, 1); m->m_flags |= M_EXT; m->m_ext.ext_buf = buf; m->m_ext.ext_cnt = ref_cnt; m->m_data = m->m_ext.ext_buf; m->m_ext.ext_size = size; m->m_ext.ext_free = freef; m->m_ext.ext_arg1 = arg1; m->m_ext.ext_arg2 = arg2; m->m_ext.ext_type = EXT_EXTREF; m->m_ext.ext_flags = 0; } static __inline uma_zone_t m_getzone(int size) { uma_zone_t zone; switch (size) { case MCLBYTES: zone = zone_clust; break; #if MJUMPAGESIZE != MCLBYTES case MJUMPAGESIZE: zone = zone_jumbop; break; #endif case MJUM9BYTES: zone = zone_jumbo9; break; case MJUM16BYTES: zone = zone_jumbo16; break; default: panic("%s: invalid cluster size %d", __func__, size); } return (zone); } /* * Initialize an mbuf with linear storage. * * Inline because the consumer text overhead will be roughly the same to * initialize or call a function with this many parameters and M_PKTHDR * should go away with constant propagation for !MGETHDR. */ static __inline int m_init(struct mbuf *m, int how, short type, int flags) { int error; m->m_next = NULL; m->m_nextpkt = NULL; m->m_data = m->m_dat; m->m_len = 0; m->m_flags = flags; m->m_type = type; if (flags & M_PKTHDR) error = m_pkthdr_init(m, how); else error = 0; MBUF_PROBE5(m__init, m, how, type, flags, error); return (error); } static __inline struct mbuf * m_get(int how, short type) { struct mbuf *m; struct mb_args args; args.flags = 0; args.type = type; m = uma_zalloc_arg(zone_mbuf, &args, how); MBUF_PROBE3(m__get, how, type, m); return (m); } static __inline struct mbuf * m_gethdr(int how, short type) { struct mbuf *m; struct mb_args args; args.flags = M_PKTHDR; args.type = type; m = uma_zalloc_arg(zone_mbuf, &args, how); MBUF_PROBE3(m__gethdr, how, type, m); return (m); } static __inline struct mbuf * m_getcl(int how, short type, int flags) { struct mbuf *m; struct mb_args args; args.flags = flags; args.type = type; m = uma_zalloc_arg(zone_pack, &args, how); MBUF_PROBE4(m__getcl, how, type, flags, m); return (m); } /* * XXX: m_cljset() is a dangerous API. One must attach only a new, * unreferenced cluster to an mbuf(9). It is not possible to assert * that, so care can be taken only by users of the API. */ static __inline void m_cljset(struct mbuf *m, void *cl, int type) { int size; switch (type) { case EXT_CLUSTER: size = MCLBYTES; break; #if MJUMPAGESIZE != MCLBYTES case EXT_JUMBOP: size = MJUMPAGESIZE; break; #endif case EXT_JUMBO9: size = MJUM9BYTES; break; case EXT_JUMBO16: size = MJUM16BYTES; break; default: panic("%s: unknown cluster type %d", __func__, type); break; } m->m_data = m->m_ext.ext_buf = cl; m->m_ext.ext_free = m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL; m->m_ext.ext_size = size; m->m_ext.ext_type = type; m->m_ext.ext_flags = EXT_FLAG_EMBREF; m->m_ext.ext_count = 1; m->m_flags |= M_EXT; MBUF_PROBE3(m__cljset, m, cl, type); } static __inline void m_chtype(struct mbuf *m, short new_type) { m->m_type = new_type; } static __inline void m_clrprotoflags(struct mbuf *m) { while (m) { m->m_flags &= ~M_PROTOFLAGS; m = m->m_next; } } static __inline struct mbuf * m_last(struct mbuf *m) { while (m->m_next) m = m->m_next; return (m); } static inline u_int m_extrefcnt(struct mbuf *m) { KASSERT(m->m_flags & M_EXT, ("%s: M_EXT missing", __func__)); return ((m->m_ext.ext_flags & EXT_FLAG_EMBREF) ? m->m_ext.ext_count : *m->m_ext.ext_cnt); } /* * mbuf, cluster, and external object allocation macros (for compatibility * purposes). */ #define M_MOVE_PKTHDR(to, from) m_move_pkthdr((to), (from)) #define MGET(m, how, type) ((m) = m_get((how), (type))) #define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type))) #define MCLGET(m, how) m_clget((m), (how)) #define MEXTADD(m, buf, size, free, arg1, arg2, flags, type) \ m_extadd((m), (char *)(buf), (size), (free), (arg1), (arg2), \ (flags), (type)) #define m_getm(m, len, how, type) \ m_getm2((m), (len), (how), (type), M_PKTHDR) /* * Evaluate TRUE if it's safe to write to the mbuf m's data region (this can * be both the local data payload, or an external buffer area, depending on * whether M_EXT is set). */ #define M_WRITABLE(m) (((m)->m_flags & (M_RDONLY | M_NOMAP)) == 0 && \ (!(((m)->m_flags & M_EXT)) || \ (m_extrefcnt(m) == 1))) /* Check if the supplied mbuf has a packet header, or else panic. */ #define M_ASSERTPKTHDR(m) \ KASSERT((m) != NULL && (m)->m_flags & M_PKTHDR, \ ("%s: no mbuf packet header!", __func__)) /* * Ensure that the supplied mbuf is a valid, non-free mbuf. * * XXX: Broken at the moment. Need some UMA magic to make it work again. */ #define M_ASSERTVALID(m) \ KASSERT((((struct mbuf *)m)->m_flags & 0) == 0, \ ("%s: attempted use of a free mbuf!", __func__)) /* * Return the address of the start of the buffer associated with an mbuf, * handling external storage, packet-header mbufs, and regular data mbufs. */ #define M_START(m) \ (((m)->m_flags & M_NOMAP) ? NULL : \ ((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \ ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] : \ &(m)->m_dat[0]) /* * Return the size of the buffer associated with an mbuf, handling external * storage, packet-header mbufs, and regular data mbufs. */ #define M_SIZE(m) \ (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size : \ ((m)->m_flags & M_PKTHDR) ? MHLEN : \ MLEN) /* * Set the m_data pointer of a newly allocated mbuf to place an object of the * specified size at the end of the mbuf, longword aligned. * * NB: Historically, we had M_ALIGN(), MH_ALIGN(), and MEXT_ALIGN() as * separate macros, each asserting that it was called at the proper moment. * This required callers to themselves test the storage type and call the * right one. Rather than require callers to be aware of those layout * decisions, we centralize here. */ static __inline void m_align(struct mbuf *m, int len) { #ifdef INVARIANTS const char *msg = "%s: not a virgin mbuf"; #endif int adjust; KASSERT(m->m_data == M_START(m), (msg, __func__)); adjust = M_SIZE(m) - len; m->m_data += adjust &~ (sizeof(long)-1); } #define M_ALIGN(m, len) m_align(m, len) #define MH_ALIGN(m, len) m_align(m, len) #define MEXT_ALIGN(m, len) m_align(m, len) /* * Compute the amount of space available before the current start of data in * an mbuf. * * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. * * NB: In previous versions, M_LEADINGSPACE() would only check M_WRITABLE() * for mbufs with external storage. We now allow mbuf-embedded data to be * read-only as well. */ #define M_LEADINGSPACE(m) \ (M_WRITABLE(m) ? ((m)->m_data - M_START(m)) : 0) /* * Compute the amount of space available after the end of data in an mbuf. * * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. * * NB: In previous versions, M_TRAILINGSPACE() would only check M_WRITABLE() * for mbufs with external storage. We now allow mbuf-embedded data to be * read-only as well. */ #define M_TRAILINGSPACE(m) \ (M_WRITABLE(m) ? \ ((M_START(m) + M_SIZE(m)) - ((m)->m_data + (m)->m_len)) : 0) /* * Arrange to prepend space of size plen to mbuf m. If a new mbuf must be * allocated, how specifies whether to wait. If the allocation fails, the * original mbuf chain is freed and m is set to NULL. */ #define M_PREPEND(m, plen, how) do { \ struct mbuf **_mmp = &(m); \ struct mbuf *_mm = *_mmp; \ int _mplen = (plen); \ int __mhow = (how); \ \ MBUF_CHECKSLEEP(how); \ if (M_LEADINGSPACE(_mm) >= _mplen) { \ _mm->m_data -= _mplen; \ _mm->m_len += _mplen; \ } else \ _mm = m_prepend(_mm, _mplen, __mhow); \ if (_mm != NULL && _mm->m_flags & M_PKTHDR) \ _mm->m_pkthdr.len += _mplen; \ *_mmp = _mm; \ } while (0) /* * Change mbuf to new type. This is a relatively expensive operation and * should be avoided. */ #define MCHTYPE(m, t) m_chtype((m), (t)) /* Return the rcvif of a packet header. */ static __inline struct ifnet * m_rcvif(struct mbuf *m) { M_ASSERTPKTHDR(m); if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) return (NULL); return (m->m_pkthdr.rcvif); } /* Length to m_copy to copy all. */ #define M_COPYALL 1000000000 extern int max_datalen; /* MHLEN - max_hdr */ extern int max_hdr; /* Largest link + protocol header */ extern int max_linkhdr; /* Largest link-level header */ extern int max_protohdr; /* Largest protocol header */ extern int nmbclusters; /* Maximum number of clusters */ extern bool mb_use_ext_pgs; /* Use ext_pgs for sendfile */ /*- * Network packets may have annotations attached by affixing a list of * "packet tags" to the pkthdr structure. Packet tags are dynamically * allocated semi-opaque data structures that have a fixed header * (struct m_tag) that specifies the size of the memory block and a * pair that identifies it. The cookie is a 32-bit unique * unsigned value used to identify a module or ABI. By convention this value * is chosen as the date+time that the module is created, expressed as the * number of seconds since the epoch (e.g., using date -u +'%s'). The type * value is an ABI/module-specific value that identifies a particular * annotation and is private to the module. For compatibility with systems * like OpenBSD that define packet tags w/o an ABI/module cookie, the value * PACKET_ABI_COMPAT is used to implement m_tag_get and m_tag_find * compatibility shim functions and several tag types are defined below. * Users that do not require compatibility should use a private cookie value * so that packet tag-related definitions can be maintained privately. * * Note that the packet tag returned by m_tag_alloc has the default memory * alignment implemented by malloc. To reference private data one can use a * construct like: * * struct m_tag *mtag = m_tag_alloc(...); * struct foo *p = (struct foo *)(mtag+1); * * if the alignment of struct m_tag is sufficient for referencing members of * struct foo. Otherwise it is necessary to embed struct m_tag within the * private data structure to insure proper alignment; e.g., * * struct foo { * struct m_tag tag; * ... * }; * struct foo *p = (struct foo *) m_tag_alloc(...); * struct m_tag *mtag = &p->tag; */ /* * Persistent tags stay with an mbuf until the mbuf is reclaimed. Otherwise * tags are expected to ``vanish'' when they pass through a network * interface. For most interfaces this happens normally as the tags are * reclaimed when the mbuf is free'd. However in some special cases * reclaiming must be done manually. An example is packets that pass through * the loopback interface. Also, one must be careful to do this when * ``turning around'' packets (e.g., icmp_reflect). * * To mark a tag persistent bit-or this flag in when defining the tag id. * The tag will then be treated as described above. */ #define MTAG_PERSISTENT 0x800 #define PACKET_TAG_NONE 0 /* Nadda */ /* Packet tags for use with PACKET_ABI_COMPAT. */ #define PACKET_TAG_IPSEC_IN_DONE 1 /* IPsec applied, in */ #define PACKET_TAG_IPSEC_OUT_DONE 2 /* IPsec applied, out */ #define PACKET_TAG_IPSEC_IN_CRYPTO_DONE 3 /* NIC IPsec crypto done */ #define PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED 4 /* NIC IPsec crypto req'ed */ #define PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO 5 /* NIC notifies IPsec */ #define PACKET_TAG_IPSEC_PENDING_TDB 6 /* Reminder to do IPsec */ #define PACKET_TAG_BRIDGE 7 /* Bridge processing done */ #define PACKET_TAG_GIF 8 /* GIF processing done */ #define PACKET_TAG_GRE 9 /* GRE processing done */ #define PACKET_TAG_IN_PACKET_CHECKSUM 10 /* NIC checksumming done */ #define PACKET_TAG_ENCAP 11 /* Encap. processing */ #define PACKET_TAG_IPSEC_SOCKET 12 /* IPSEC socket ref */ #define PACKET_TAG_IPSEC_HISTORY 13 /* IPSEC history */ #define PACKET_TAG_IPV6_INPUT 14 /* IPV6 input processing */ #define PACKET_TAG_DUMMYNET 15 /* dummynet info */ #define PACKET_TAG_DIVERT 17 /* divert info */ #define PACKET_TAG_IPFORWARD 18 /* ipforward info */ #define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */ #define PACKET_TAG_PF (21 | MTAG_PERSISTENT) /* PF/ALTQ information */ #define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */ #define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */ #define PACKET_TAG_CARP 28 /* CARP info */ #define PACKET_TAG_IPSEC_NAT_T_PORTS 29 /* two uint16_t */ #define PACKET_TAG_ND_OUTGOING 30 /* ND outgoing */ /* Specific cookies and tags. */ /* Packet tag routines. */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int); void m_tag_delete(struct mbuf *, struct m_tag *); void m_tag_delete_chain(struct mbuf *, struct m_tag *); void m_tag_free_default(struct m_tag *); struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *); struct m_tag *m_tag_copy(struct m_tag *, int); int m_tag_copy_chain(struct mbuf *, const struct mbuf *, int); void m_tag_delete_nonpersistent(struct mbuf *); /* * Initialize the list of tags associated with an mbuf. */ static __inline void m_tag_init(struct mbuf *m) { SLIST_INIT(&m->m_pkthdr.tags); } /* * Set up the contents of a tag. Note that this does not fill in the free * method; the caller is expected to do that. * * XXX probably should be called m_tag_init, but that was already taken. */ static __inline void m_tag_setup(struct m_tag *t, u_int32_t cookie, int type, int len) { t->m_tag_id = type; t->m_tag_len = len; t->m_tag_cookie = cookie; } /* * Reclaim resources associated with a tag. */ static __inline void m_tag_free(struct m_tag *t) { (*t->m_tag_free)(t); } /* * Return the first tag associated with an mbuf. */ static __inline struct m_tag * m_tag_first(struct mbuf *m) { return (SLIST_FIRST(&m->m_pkthdr.tags)); } /* * Return the next tag in the list of tags associated with an mbuf. */ static __inline struct m_tag * m_tag_next(struct mbuf *m __unused, struct m_tag *t) { return (SLIST_NEXT(t, m_tag_link)); } /* * Prepend a tag to the list of tags associated with an mbuf. */ static __inline void m_tag_prepend(struct mbuf *m, struct m_tag *t) { SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); } /* * Unlink a tag from the list of tags associated with an mbuf. */ static __inline void m_tag_unlink(struct mbuf *m, struct m_tag *t) { SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); } /* These are for OpenBSD compatibility. */ #define MTAG_ABI_COMPAT 0 /* compatibility ABI */ static __inline struct m_tag * m_tag_get(int type, int length, int wait) { return (m_tag_alloc(MTAG_ABI_COMPAT, type, length, wait)); } static __inline struct m_tag * m_tag_find(struct mbuf *m, int type, struct m_tag *start) { return (SLIST_EMPTY(&m->m_pkthdr.tags) ? (struct m_tag *)NULL : m_tag_locate(m, MTAG_ABI_COMPAT, type, start)); } static inline struct m_snd_tag * m_snd_tag_ref(struct m_snd_tag *mst) { refcount_acquire(&mst->refcount); return (mst); } static inline void m_snd_tag_rele(struct m_snd_tag *mst) { if (refcount_release(&mst->refcount)) m_snd_tag_destroy(mst); } static __inline struct mbuf * m_free(struct mbuf *m) { struct mbuf *n = m->m_next; MBUF_PROBE1(m__free, m); if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE)) m_tag_delete_chain(m, NULL); if (m->m_flags & M_PKTHDR && m->m_pkthdr.csum_flags & CSUM_SND_TAG) m_snd_tag_rele(m->m_pkthdr.snd_tag); if (m->m_flags & M_EXT) mb_free_ext(m); else if ((m->m_flags & M_NOFREE) == 0) uma_zfree(zone_mbuf, m); return (n); } static __inline int rt_m_getfib(struct mbuf *m) { KASSERT(m->m_flags & M_PKTHDR , ("Attempt to get FIB from non header mbuf.")); return (m->m_pkthdr.fibnum); } #define M_GETFIB(_m) rt_m_getfib(_m) #define M_SETFIB(_m, _fib) do { \ KASSERT((_m)->m_flags & M_PKTHDR, ("Attempt to set FIB on non header mbuf.")); \ ((_m)->m_pkthdr.fibnum) = (_fib); \ } while (0) /* flags passed as first argument for "m_ether_tcpip_hash()" */ #define MBUF_HASHFLAG_L2 (1 << 2) #define MBUF_HASHFLAG_L3 (1 << 3) #define MBUF_HASHFLAG_L4 (1 << 4) /* mbuf hashing helper routines */ uint32_t m_ether_tcpip_hash_init(void); uint32_t m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t); #ifdef MBUF_PROFILING void m_profile(struct mbuf *m); #define M_PROFILE(m) m_profile(m) #else #define M_PROFILE(m) #endif struct mbufq { STAILQ_HEAD(, mbuf) mq_head; int mq_len; int mq_maxlen; }; static inline void mbufq_init(struct mbufq *mq, int maxlen) { STAILQ_INIT(&mq->mq_head); mq->mq_maxlen = maxlen; mq->mq_len = 0; } static inline struct mbuf * mbufq_flush(struct mbufq *mq) { struct mbuf *m; m = STAILQ_FIRST(&mq->mq_head); STAILQ_INIT(&mq->mq_head); mq->mq_len = 0; return (m); } static inline void mbufq_drain(struct mbufq *mq) { struct mbuf *m, *n; n = mbufq_flush(mq); while ((m = n) != NULL) { n = STAILQ_NEXT(m, m_stailqpkt); m_freem(m); } } static inline struct mbuf * mbufq_first(const struct mbufq *mq) { return (STAILQ_FIRST(&mq->mq_head)); } static inline struct mbuf * mbufq_last(const struct mbufq *mq) { return (STAILQ_LAST(&mq->mq_head, mbuf, m_stailqpkt)); } static inline int mbufq_full(const struct mbufq *mq) { return (mq->mq_maxlen > 0 && mq->mq_len >= mq->mq_maxlen); } static inline int mbufq_len(const struct mbufq *mq) { return (mq->mq_len); } static inline int mbufq_enqueue(struct mbufq *mq, struct mbuf *m) { if (mbufq_full(mq)) return (ENOBUFS); STAILQ_INSERT_TAIL(&mq->mq_head, m, m_stailqpkt); mq->mq_len++; return (0); } static inline struct mbuf * mbufq_dequeue(struct mbufq *mq) { struct mbuf *m; m = STAILQ_FIRST(&mq->mq_head); if (m) { STAILQ_REMOVE_HEAD(&mq->mq_head, m_stailqpkt); m->m_nextpkt = NULL; mq->mq_len--; } return (m); } static inline void mbufq_prepend(struct mbufq *mq, struct mbuf *m) { STAILQ_INSERT_HEAD(&mq->mq_head, m, m_stailqpkt); mq->mq_len++; } /* * Note: this doesn't enforce the maximum list size for dst. */ static inline void mbufq_concat(struct mbufq *mq_dst, struct mbufq *mq_src) { mq_dst->mq_len += mq_src->mq_len; STAILQ_CONCAT(&mq_dst->mq_head, &mq_src->mq_head); mq_src->mq_len = 0; } #ifdef _SYS_TIMESPEC_H_ static inline void mbuf_tstmp2timespec(struct mbuf *m, struct timespec *ts) { KASSERT((m->m_flags & M_PKTHDR) != 0, ("mbuf %p no M_PKTHDR", m)); KASSERT((m->m_flags & (M_TSTMP|M_TSTMP_LRO)) != 0, ("mbuf %p no M_TSTMP or M_TSTMP_LRO", m)); ts->tv_sec = m->m_pkthdr.rcv_tstmp / 1000000000; ts->tv_nsec = m->m_pkthdr.rcv_tstmp % 1000000000; } #endif #ifdef DEBUGNET /* Invoked from the debugnet client code. */ void debugnet_mbuf_drain(void); void debugnet_mbuf_start(void); void debugnet_mbuf_finish(void); void debugnet_mbuf_reinit(int nmbuf, int nclust, int clsize); #endif static inline bool mbuf_has_tls_session(struct mbuf *m) { if (m->m_flags & M_NOMAP) { MBUF_EXT_PGS_ASSERT(m); if (m->m_ext_pgs.tls != NULL) { return (true); } } return (false); } #endif /* _KERNEL */ #endif /* !_SYS_MBUF_H_ */