Index: head/sys/fs/nfs/nfscl.h =================================================================== --- head/sys/fs/nfs/nfscl.h (revision 321687) +++ head/sys/fs/nfs/nfscl.h (revision 321688) @@ -1,80 +1,81 @@ /*- * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSCL_H #define _NFS_NFSCL_H /* * Extra stuff for a NFSv4 nfsnode. * MALLOC'd to the correct length for the name and file handle. * n4_data has the file handle, followed by the file name. * The macro NFS4NODENAME() returns a pointer to the start of the * name. */ struct nfsv4node { u_int16_t n4_fhlen; u_int16_t n4_namelen; u_int8_t n4_data[1]; }; #define NFS4NODENAME(n) (&((n)->n4_data[(n)->n4_fhlen])) /* * Just a macro to convert the nfscl_reqstart arguments. */ #define NFSCL_REQSTART(n, p, v) \ nfscl_reqstart((n), (p), VFSTONFS((v)->v_mount), \ VTONFS(v)->n_fhp->nfh_fh, VTONFS(v)->n_fhp->nfh_len, NULL, NULL) /* * These two macros convert between a lease duration and renew interval. * For now, just make the renew interval 1/2 the lease duration. * (They should be inverse operators.) */ #define NFSCL_RENEW(l) (((l) < 2) ? 1 : ((l) / 2)) #define NFSCL_LEASE(r) ((r) * 2) /* This macro checks to see if a forced dismount is about to occur. */ -#define NFSCL_FORCEDISM(m) (((m)->mnt_kern_flag & MNTK_UNMOUNTF) != 0) +#define NFSCL_FORCEDISM(m) (((m)->mnt_kern_flag & MNTK_UNMOUNTF) != 0 || \ + (VFSTONFS(m)->nm_privflag & NFSMNTP_FORCEDISM) != 0) /* * These flag bits are used for the argument to nfscl_fillsattr() to * indicate special handling of the attributes. */ #define NFSSATTR_FULL 0x1 #define NFSSATTR_SIZE0 0x2 #define NFSSATTR_SIZENEG1 0x4 #define NFSSATTR_SIZERDEV 0x8 /* Use this macro for debug printfs. */ #define NFSCL_DEBUG(level, ...) do { \ if (nfscl_debuglevel >= (level)) \ printf(__VA_ARGS__); \ } while (0) #endif /* _NFS_NFSCL_H */ Index: head/sys/fs/nfsclient/nfs_clport.c =================================================================== --- head/sys/fs/nfsclient/nfs_clport.c (revision 321687) +++ head/sys/fs/nfsclient/nfs_clport.c (revision 321688) @@ -1,1438 +1,1490 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include /* * generally, I don't like #includes inside .h files, but it seems to * be the easiest way to handle the port. */ #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS dtrace_nfsclient_attrcache_flush_probe_func_t dtrace_nfscl_attrcache_flush_done_probe; uint32_t nfscl_attrcache_flush_done_id; dtrace_nfsclient_attrcache_get_hit_probe_func_t dtrace_nfscl_attrcache_get_hit_probe; uint32_t nfscl_attrcache_get_hit_id; dtrace_nfsclient_attrcache_get_miss_probe_func_t dtrace_nfscl_attrcache_get_miss_probe; uint32_t nfscl_attrcache_get_miss_id; dtrace_nfsclient_attrcache_load_probe_func_t dtrace_nfscl_attrcache_load_done_probe; uint32_t nfscl_attrcache_load_done_id; #endif /* !KDTRACE_HOOKS */ extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern struct vop_vector newnfs_vnodeops; extern struct vop_vector newnfs_fifoops; extern uma_zone_t newnfsnode_zone; extern struct buf_ops buf_ops_newnfs; extern int ncl_pbuf_freecnt; extern short nfsv4_cbport; extern int nfscl_enablecallb; extern int nfs_numnfscbd; extern int nfscl_inited; struct mtx ncl_iod_mutex; NFSDLOCKMUTEX; extern void (*ncl_call_invalcaches)(struct vnode *); SYSCTL_DECL(_vfs_nfs); static int ncl_fileid_maxwarnings = 10; SYSCTL_INT(_vfs_nfs, OID_AUTO, fileid_maxwarnings, CTLFLAG_RWTUN, &ncl_fileid_maxwarnings, 0, "Limit fileid corruption warnings; 0 is off; -1 is unlimited"); static volatile int ncl_fileid_nwarnings; static void nfscl_warn_fileid(struct nfsmount *, struct nfsvattr *, struct nfsvattr *); /* * Comparison function for vfs_hash functions. */ int newnfs_vncmpf(struct vnode *vp, void *arg) { struct nfsfh *nfhp = (struct nfsfh *)arg; struct nfsnode *np = VTONFS(vp); if (np->n_fhp->nfh_len != nfhp->nfh_len || NFSBCMP(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len)) return (1); return (0); } /* * Look up a vnode/nfsnode by file handle. * Callers must check for mount points!! * In all cases, a pointer to a * nfsnode structure is returned. * This variant takes a "struct nfsfh *" as second argument and uses * that structure up, either by hanging off the nfsnode or FREEing it. */ int nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp, struct componentname *cnp, struct thread *td, struct nfsnode **npp, void *stuff, int lkflags) { struct nfsnode *np, *dnp; struct vnode *vp, *nvp; struct nfsv4node *newd, *oldd; int error; u_int hash; struct nfsmount *nmp; nmp = VFSTONFS(mntp); dnp = VTONFS(dvp); *npp = NULL; hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, FNV1_32_INIT); error = vfs_hash_get(mntp, hash, lkflags, td, &nvp, newnfs_vncmpf, nfhp); if (error == 0 && nvp != NULL) { /* * I believe there is a slight chance that vgonel() could * get called on this vnode between when NFSVOPLOCK() drops * the VI_LOCK() and vget() acquires it again, so that it * hasn't yet had v_usecount incremented. If this were to * happen, the VI_DOOMED flag would be set, so check for * that here. Since we now have the v_usecount incremented, * we should be ok until we vrele() it, if the VI_DOOMED * flag isn't set now. */ VI_LOCK(nvp); if ((nvp->v_iflag & VI_DOOMED)) { VI_UNLOCK(nvp); vrele(nvp); error = ENOENT; } else { VI_UNLOCK(nvp); } } if (error) { FREE((caddr_t)nfhp, M_NFSFH); return (error); } if (nvp != NULL) { np = VTONFS(nvp); /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ oldd = newd = NULL; if ((nmp->nm_flag & NFSMNT_NFSV4) && np->n_v4 != NULL && nvp->v_type == VREG && (np->n_v4->n4_namelen != cnp->cn_namelen || NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { MALLOC(newd, struct nfsv4node *, sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + + cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); NFSLOCKNODE(np); if (newd != NULL && np->n_v4 != NULL && nvp->v_type == VREG && (np->n_v4->n4_namelen != cnp->cn_namelen || NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { oldd = np->n_v4; np->n_v4 = newd; newd = NULL; np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = cnp->cn_namelen; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen); } NFSUNLOCKNODE(np); } if (newd != NULL) FREE((caddr_t)newd, M_NFSV4NODE); if (oldd != NULL) FREE((caddr_t)oldd, M_NFSV4NODE); *npp = np; FREE((caddr_t)nfhp, M_NFSFH); return (0); } np = uma_zalloc(newnfsnode_zone, M_WAITOK | M_ZERO); error = getnewvnode(nfs_vnode_tag, mntp, &newnfs_vnodeops, &nvp); if (error) { uma_zfree(newnfsnode_zone, np); FREE((caddr_t)nfhp, M_NFSFH); return (error); } vp = nvp; KASSERT(vp->v_bufobj.bo_bsize != 0, ("nfscl_nget: bo_bsize == 0")); vp->v_bufobj.bo_ops = &buf_ops_newnfs; vp->v_data = np; np->n_vnode = vp; /* * Initialize the mutex even if the vnode is going to be a loser. * This simplifies the logic in reclaim, which can then unconditionally * destroy the mutex (in the case of the loser, or if hash_insert * happened to return an error no special casing is needed). */ mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK); /* * Are we getting the root? If so, make sure the vnode flags * are correct */ if ((nfhp->nfh_len == nmp->nm_fhsize) && !bcmp(nfhp->nfh_fh, nmp->nm_fh, nfhp->nfh_len)) { if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; } np->n_fhp = nfhp; /* * For NFSv4, we have to attach the directory file handle and * file name, so that Open Ops can be done later. */ if (nmp->nm_flag & NFSMNT_NFSV4) { MALLOC(np->n_v4, struct nfsv4node *, sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = cnp->cn_namelen; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen); } else { np->n_v4 = NULL; } /* * NFS supports recursive and shared locking. */ lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_NOWITNESS, NULL); VN_LOCK_AREC(vp); VN_LOCK_ASHARE(vp); error = insmntque(vp, mntp); if (error != 0) { *npp = NULL; mtx_destroy(&np->n_mtx); FREE((caddr_t)nfhp, M_NFSFH); if (np->n_v4 != NULL) FREE((caddr_t)np->n_v4, M_NFSV4NODE); uma_zfree(newnfsnode_zone, np); return (error); } error = vfs_hash_insert(vp, hash, lkflags, td, &nvp, newnfs_vncmpf, nfhp); if (error) return (error); if (nvp != NULL) { *npp = VTONFS(nvp); /* vfs_hash_insert() vput()'s the losing vnode */ return (0); } *npp = np; return (0); } /* * Another variant of nfs_nget(). This one is only used by reopen. It * takes almost the same args as nfs_nget(), but only succeeds if an entry * exists in the cache. (Since files should already be "open" with a * vnode ref cnt on the node when reopen calls this, it should always * succeed.) * Also, don't get a vnode lock, since it may already be locked by some * other process that is handling it. This is ok, since all other threads * on the client are blocked by the nfsc_lock being exclusively held by the * caller of this function. */ int nfscl_ngetreopen(struct mount *mntp, u_int8_t *fhp, int fhsize, struct thread *td, struct nfsnode **npp) { struct vnode *nvp; u_int hash; struct nfsfh *nfhp; int error; *npp = NULL; /* For forced dismounts, just return error. */ if (NFSCL_FORCEDISM(mntp)) return (EINTR); MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + fhsize, M_NFSFH, M_WAITOK); bcopy(fhp, &nfhp->nfh_fh[0], fhsize); nfhp->nfh_len = fhsize; hash = fnv_32_buf(fhp, fhsize, FNV1_32_INIT); /* * First, try to get the vnode locked, but don't block for the lock. */ error = vfs_hash_get(mntp, hash, (LK_EXCLUSIVE | LK_NOWAIT), td, &nvp, newnfs_vncmpf, nfhp); if (error == 0 && nvp != NULL) { NFSVOPUNLOCK(nvp, 0); } else if (error == EBUSY) { /* * It is safe so long as a vflush() with * FORCECLOSE has not been done. Since the Renew thread is * stopped and the MNTK_UNMOUNTF flag is set before doing * a vflush() with FORCECLOSE, we should be ok here. */ if (NFSCL_FORCEDISM(mntp)) error = EINTR; else { vfs_hash_ref(mntp, hash, td, &nvp, newnfs_vncmpf, nfhp); if (nvp == NULL) { error = ENOENT; } else if ((nvp->v_iflag & VI_DOOMED) != 0) { error = ENOENT; vrele(nvp); } else { error = 0; } } } FREE(nfhp, M_NFSFH); if (error) return (error); if (nvp != NULL) { *npp = VTONFS(nvp); return (0); } return (EINVAL); } static void nfscl_warn_fileid(struct nfsmount *nmp, struct nfsvattr *oldnap, struct nfsvattr *newnap) { int off; if (ncl_fileid_maxwarnings >= 0 && ncl_fileid_nwarnings >= ncl_fileid_maxwarnings) return; off = 0; if (ncl_fileid_maxwarnings >= 0) { if (++ncl_fileid_nwarnings >= ncl_fileid_maxwarnings) off = 1; } printf("newnfs: server '%s' error: fileid changed. " "fsid %jx:%jx: expected fileid %#jx, got %#jx. " "(BROKEN NFS SERVER OR MIDDLEWARE)\n", nmp->nm_com.nmcom_hostname, (uintmax_t)nmp->nm_fsid[0], (uintmax_t)nmp->nm_fsid[1], (uintmax_t)oldnap->na_fileid, (uintmax_t)newnap->na_fileid); if (off) printf("newnfs: Logged %d times about fileid corruption; " "going quiet to avoid spamming logs excessively. (Limit " "is: %d).\n", ncl_fileid_nwarnings, ncl_fileid_maxwarnings); } /* * Load the attribute cache (that lives in the nfsnode entry) with * the attributes of the second argument and * Iff vaper not NULL * copy the attributes to *vaper * Similar to nfs_loadattrcache(), except the attributes are passed in * instead of being parsed out of the mbuf list. */ int nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, void *stuff, int writeattr, int dontshrink) { struct vnode *vp = *vpp; struct vattr *vap, *nvap = &nap->na_vattr, *vaper = nvaper; struct nfsnode *np; struct nfsmount *nmp; struct timespec mtime_save; u_quad_t nsize; int setnsize, error, force_fid_err; error = 0; setnsize = 0; nsize = 0; /* * If v_type == VNON it is a new node, so fill in the v_type, * n_mtime fields. Check to see if it represents a special * device, and if so, check for a possible alias. Once the * correct vnode has been obtained, fill in the rest of the * information. */ np = VTONFS(vp); NFSLOCKNODE(np); if (vp->v_type != nvap->va_type) { vp->v_type = nvap->va_type; if (vp->v_type == VFIFO) vp->v_op = &newnfs_fifoops; np->n_mtime = nvap->va_mtime; } nmp = VFSTONFS(vp->v_mount); vap = &np->n_vattr.na_vattr; mtime_save = vap->va_mtime; if (writeattr) { np->n_vattr.na_filerev = nap->na_filerev; np->n_vattr.na_size = nap->na_size; np->n_vattr.na_mtime = nap->na_mtime; np->n_vattr.na_ctime = nap->na_ctime; np->n_vattr.na_fsid = nap->na_fsid; np->n_vattr.na_mode = nap->na_mode; } else { force_fid_err = 0; KFAIL_POINT_ERROR(DEBUG_FP, nfscl_force_fileid_warning, force_fid_err); /* * BROKEN NFS SERVER OR MIDDLEWARE * * Certain NFS servers (certain old proprietary filers ca. * 2006) or broken middleboxes (e.g. WAN accelerator products) * will respond to GETATTR requests with results for a * different fileid. * * The WAN accelerator we've observed not only serves stale * cache results for a given file, it also occasionally serves * results for wholly different files. This causes surprising * problems; for example the cached size attribute of a file * may truncate down and then back up, resulting in zero * regions in file contents read by applications. We observed * this reliably with Clang and .c files during parallel build. * A pcap revealed packet fragmentation and GETATTR RPC * responses with wholly wrong fileids. */ if ((np->n_vattr.na_fileid != 0 && np->n_vattr.na_fileid != nap->na_fileid) || force_fid_err) { nfscl_warn_fileid(nmp, &np->n_vattr, nap); error = EIDRM; goto out; } NFSBCOPY((caddr_t)nap, (caddr_t)&np->n_vattr, sizeof (struct nfsvattr)); } /* * For NFSv4, if the node's fsid is not equal to the mount point's * fsid, return the low order 32bits of the node's fsid. This * allows getcwd(3) to work. There is a chance that the fsid might * be the same as a local fs, but since this is in an NFS mount * point, I don't think that will cause any problems? */ if (NFSHASNFSV4(nmp) && NFSHASHASSETFSID(nmp) && (nmp->nm_fsid[0] != np->n_vattr.na_filesid[0] || nmp->nm_fsid[1] != np->n_vattr.na_filesid[1])) { /* * va_fsid needs to be set to some value derived from * np->n_vattr.na_filesid that is not equal * vp->v_mount->mnt_stat.f_fsid[0], so that it changes * from the value used for the top level server volume * in the mounted subtree. */ vn_fsid(vp, vap); if ((uint32_t)vap->va_fsid == np->n_vattr.na_filesid[0]) vap->va_fsid = hash32_buf( np->n_vattr.na_filesid, 2 * sizeof(uint64_t), 0); } else vn_fsid(vp, vap); np->n_attrstamp = time_second; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (dontshrink && vap->va_size < np->n_size) { /* * We've been told not to shrink the file; * zero np->n_attrstamp to indicate that * the attributes are stale. */ vap->va_size = np->n_size; np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); vnode_pager_setsize(vp, np->n_size); } else if (np->n_flag & NMODIFIED) { /* * We've modified the file: Use the larger * of our size, and the server's size. */ if (vap->va_size < np->n_size) { vap->va_size = np->n_size; } else { np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; } vnode_pager_setsize(vp, np->n_size); } else if (vap->va_size < np->n_size) { /* * When shrinking the size, the call to * vnode_pager_setsize() cannot be done * with the mutex held, so delay it until * after the mtx_unlock call. */ nsize = np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; setnsize = 1; } else { np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; vnode_pager_setsize(vp, np->n_size); } } else { np->n_size = vap->va_size; } } /* * The following checks are added to prevent a race between (say) * a READDIR+ and a WRITE. * READDIR+, WRITE requests sent out. * READDIR+ resp, WRITE resp received on client. * However, the WRITE resp was handled before the READDIR+ resp * causing the post op attrs from the write to be loaded first * and the attrs from the READDIR+ to be loaded later. If this * happens, we have stale attrs loaded into the attrcache. * We detect this by for the mtime moving back. We invalidate the * attrcache when this happens. */ if (timespeccmp(&mtime_save, &vap->va_mtime, >)) { /* Size changed or mtime went backwards */ np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } if (vaper != NULL) { NFSBCOPY((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } } out: #ifdef KDTRACE_HOOKS if (np->n_attrstamp != 0) KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, error); #endif NFSUNLOCKNODE(np); if (setnsize) vnode_pager_setsize(vp, nsize); return (error); } /* * Fill in the client id name. For these bytes: * 1 - they must be unique * 2 - they should be persistent across client reboots * 1 is more critical than 2 * Use the mount point's unique id plus either the uuid or, if that * isn't set, random junk. */ void nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen) { int uuidlen; /* * First, put in the 64bit mount point identifier. */ if (idlen >= sizeof (u_int64_t)) { NFSBCOPY((caddr_t)&clval, cp, sizeof (u_int64_t)); cp += sizeof (u_int64_t); idlen -= sizeof (u_int64_t); } /* * If uuid is non-zero length, use it. */ uuidlen = strlen(uuid); if (uuidlen > 0 && idlen >= uuidlen) { NFSBCOPY(uuid, cp, uuidlen); cp += uuidlen; idlen -= uuidlen; } /* * This only normally happens if the uuid isn't set. */ while (idlen > 0) { *cp++ = (u_int8_t)(arc4random() % 256); idlen--; } } /* * Fill in a lock owner name. For now, pid + the process's creation time. */ void nfscl_filllockowner(void *id, u_int8_t *cp, int flags) { union { u_int32_t lval; u_int8_t cval[4]; } tl; struct proc *p; if (id == NULL) { /* Return the single open_owner of all 0 bytes. */ bzero(cp, NFSV4CL_LOCKNAMELEN); return; } if ((flags & F_POSIX) != 0) { p = (struct proc *)id; tl.lval = p->p_pid; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp++ = tl.cval[3]; tl.lval = p->p_stats->p_start.tv_sec; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp++ = tl.cval[3]; tl.lval = p->p_stats->p_start.tv_usec; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp = tl.cval[3]; } else if ((flags & F_FLOCK) != 0) { bcopy(&id, cp, sizeof(id)); bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id)); } else { printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n"); bzero(cp, NFSV4CL_LOCKNAMELEN); } } /* * Find the parent process for the thread passed in as an argument. * If none exists, return NULL, otherwise return a thread for the parent. * (Can be any of the threads, since it is only used for td->td_proc.) */ NFSPROC_T * nfscl_getparent(struct thread *td) { struct proc *p; struct thread *ptd; if (td == NULL) return (NULL); p = td->td_proc; if (p->p_pid == 0) return (NULL); p = p->p_pptr; if (p == NULL) return (NULL); ptd = TAILQ_FIRST(&p->p_threads); return (ptd); } /* * Start up the renew kernel thread. */ static void start_nfscl(void *arg) { struct nfsclclient *clp; struct thread *td; clp = (struct nfsclclient *)arg; td = TAILQ_FIRST(&clp->nfsc_renewthread->p_threads); nfscl_renewthread(clp, td); kproc_exit(0); } void nfscl_start_renewthread(struct nfsclclient *clp) { kproc_create(start_nfscl, (void *)clp, &clp->nfsc_renewthread, 0, 0, "nfscl"); } /* * Handle wcc_data. * For NFSv4, it assumes that nfsv4_wccattr() was used to set up the getattr * as the first Op after PutFH. * (For NFSv4, the postop attributes are after the Op, so they can't be * parsed here. A separate call to nfscl_postop_attr() is required.) */ int nfscl_wcc_data(struct nfsrv_descript *nd, struct vnode *vp, struct nfsvattr *nap, int *flagp, int *wccflagp, void *stuff) { u_int32_t *tl; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; int error = 0; if (wccflagp != NULL) *wccflagp = 0; if (nd->nd_flag & ND_NFSV3) { *flagp = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); if (wccflagp != NULL) { mtx_lock(&np->n_mtx); *wccflagp = (np->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) && np->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); mtx_unlock(&np->n_mtx); } } error = nfscl_postop_attr(nd, nap, flagp, stuff); if (wccflagp != NULL && *flagp == 0) *wccflagp = 0; } else if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); if (error) return (error); /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*++tl) nd->nd_flag |= ND_NOMOREDATA; if (wccflagp != NULL && nfsva.na_vattr.va_mtime.tv_sec != 0) { mtx_lock(&np->n_mtx); *wccflagp = (np->n_mtime.tv_sec == nfsva.na_vattr.va_mtime.tv_sec && np->n_mtime.tv_nsec == nfsva.na_vattr.va_mtime.tv_sec); mtx_unlock(&np->n_mtx); } } nfsmout: return (error); } /* * Get postop attributes. */ int nfscl_postop_attr(struct nfsrv_descript *nd, struct nfsvattr *nap, int *retp, void *stuff) { u_int32_t *tl; int error = 0; *retp = 0; if (nd->nd_flag & ND_NOMOREDATA) return (error); if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); *retp = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV4) { /* * For NFSv4, the postop attr are at the end, so no point * in looking if nd_repstat != 0. */ if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) /* should never happen since nd_repstat != 0 */ nd->nd_flag |= ND_NOMOREDATA; else *retp = 1; } } else if (!nd->nd_repstat) { /* For NFSv2, the attributes are here iff nd_repstat == 0 */ *retp = 1; } if (*retp) { error = nfsm_loadattr(nd, nap); if (error) *retp = 0; } nfsmout: return (error); } /* * Fill in the setable attributes. The full argument indicates whether * to fill in them all or just mode and time. */ void nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap, struct vnode *vp, int flags, u_int32_t rdev) { u_int32_t *tl; struct nfsv2_sattr *sp; nfsattrbit_t attrbits; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_BUILD(sp, struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (mode_t)VNOVAL) sp->sa_mode = newnfs_xdrneg1; else sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = newnfs_xdrneg1; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = newnfs_xdrneg1; else sp->sa_gid = txdr_unsigned(vap->va_gid); if (flags & NFSSATTR_SIZE0) sp->sa_size = 0; else if (flags & NFSSATTR_SIZENEG1) sp->sa_size = newnfs_xdrneg1; else if (flags & NFSSATTR_SIZERDEV) sp->sa_size = txdr_unsigned(rdev); else sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); break; case ND_NFSV3: if (vap->va_mode != (mode_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_mode); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_uid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_gid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(vap->va_size, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if (vap->va_atime.tv_sec != VNOVAL) { if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_atime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } if (vap->va_mtime.tv_sec != VNOVAL) { if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_mtime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } break; case ND_NFSV4: NFSZERO_ATTRBIT(&attrbits); if (vap->va_mode != (mode_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE); if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP); if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); if (vap->va_atime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); if (vap->va_mtime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET); (void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); break; } } /* * nfscl_request() - mostly a wrapper for newnfs_request(). */ int nfscl_request(struct nfsrv_descript *nd, struct vnode *vp, NFSPROC_T *p, struct ucred *cred, void *stuff) { int ret, vers; struct nfsmount *nmp; nmp = VFSTONFS(vp->v_mount); if (nd->nd_flag & ND_NFSV4) vers = NFS_VER4; else if (nd->nd_flag & ND_NFSV3) vers = NFS_VER3; else vers = NFS_VER2; ret = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, NULL); return (ret); } /* * fill in this bsden's variant of statfs using nfsstatfs. */ void nfscl_loadsbinfo(struct nfsmount *nmp, struct nfsstatfs *sfp, void *statfs) { struct statfs *sbp = (struct statfs *)statfs; if (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) { sbp->f_bsize = NFS_FABLKSIZE; sbp->f_blocks = sfp->sf_tbytes / NFS_FABLKSIZE; sbp->f_bfree = sfp->sf_fbytes / NFS_FABLKSIZE; /* * Although sf_abytes is uint64_t and f_bavail is int64_t, * the value after dividing by NFS_FABLKSIZE is small * enough that it will fit in 63bits, so it is ok to * assign it to f_bavail without fear that it will become * negative. */ sbp->f_bavail = sfp->sf_abytes / NFS_FABLKSIZE; sbp->f_files = sfp->sf_tfiles; /* Since f_ffree is int64_t, clip it to 63bits. */ if (sfp->sf_ffiles > INT64_MAX) sbp->f_ffree = INT64_MAX; else sbp->f_ffree = sfp->sf_ffiles; } else if ((nmp->nm_flag & NFSMNT_NFSV4) == 0) { /* * The type casts to (int32_t) ensure that this code is * compatible with the old NFS client, in that it will * propagate bit31 to the high order bits. This may or may * not be correct for NFSv2, but since it is a legacy * environment, I'd rather retain backwards compatibility. */ sbp->f_bsize = (int32_t)sfp->sf_bsize; sbp->f_blocks = (int32_t)sfp->sf_blocks; sbp->f_bfree = (int32_t)sfp->sf_bfree; sbp->f_bavail = (int32_t)sfp->sf_bavail; sbp->f_files = 0; sbp->f_ffree = 0; } } /* * Use the fsinfo stuff to update the mount point. */ void nfscl_loadfsinfo(struct nfsmount *nmp, struct nfsfsinfo *fsp) { if ((nmp->nm_wsize == 0 || fsp->fs_wtpref < nmp->nm_wsize) && fsp->fs_wtpref >= NFS_FABLKSIZE) nmp->nm_wsize = (fsp->fs_wtpref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); if (fsp->fs_wtmax < nmp->nm_wsize && fsp->fs_wtmax > 0) { nmp->nm_wsize = fsp->fs_wtmax & ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize == 0) nmp->nm_wsize = fsp->fs_wtmax; } if (nmp->nm_wsize < NFS_FABLKSIZE) nmp->nm_wsize = NFS_FABLKSIZE; if ((nmp->nm_rsize == 0 || fsp->fs_rtpref < nmp->nm_rsize) && fsp->fs_rtpref >= NFS_FABLKSIZE) nmp->nm_rsize = (fsp->fs_rtpref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); if (fsp->fs_rtmax < nmp->nm_rsize && fsp->fs_rtmax > 0) { nmp->nm_rsize = fsp->fs_rtmax & ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize == 0) nmp->nm_rsize = fsp->fs_rtmax; } if (nmp->nm_rsize < NFS_FABLKSIZE) nmp->nm_rsize = NFS_FABLKSIZE; if ((nmp->nm_readdirsize == 0 || fsp->fs_dtpref < nmp->nm_readdirsize) && fsp->fs_dtpref >= NFS_DIRBLKSIZ) nmp->nm_readdirsize = (fsp->fs_dtpref + NFS_DIRBLKSIZ - 1) & ~(NFS_DIRBLKSIZ - 1); if (fsp->fs_rtmax < nmp->nm_readdirsize && fsp->fs_rtmax > 0) { nmp->nm_readdirsize = fsp->fs_rtmax & ~(NFS_DIRBLKSIZ - 1); if (nmp->nm_readdirsize == 0) nmp->nm_readdirsize = fsp->fs_rtmax; } if (nmp->nm_readdirsize < NFS_DIRBLKSIZ) nmp->nm_readdirsize = NFS_DIRBLKSIZ; if (fsp->fs_maxfilesize > 0 && fsp->fs_maxfilesize < nmp->nm_maxfilesize) nmp->nm_maxfilesize = fsp->fs_maxfilesize; nmp->nm_mountp->mnt_stat.f_iosize = newnfs_iosize(nmp); nmp->nm_state |= NFSSTA_GOTFSINFO; } /* * Lookups source address which should be used to communicate with * @nmp and stores it inside @pdst. * * Returns 0 on success. */ u_int8_t * nfscl_getmyip(struct nfsmount *nmp, struct in6_addr *paddr, int *isinet6p) { #if defined(INET6) || defined(INET) int error, fibnum; fibnum = curthread->td_proc->p_fibnum; #endif #ifdef INET if (nmp->nm_nam->sa_family == AF_INET) { struct sockaddr_in *sin; struct nhop4_extended nh_ext; sin = (struct sockaddr_in *)nmp->nm_nam; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); error = fib4_lookup_nh_ext(fibnum, sin->sin_addr, 0, 0, &nh_ext); CURVNET_RESTORE(); if (error != 0) return (NULL); if ((ntohl(nh_ext.nh_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { /* Ignore loopback addresses */ return (NULL); } *isinet6p = 0; *((struct in_addr *)paddr) = nh_ext.nh_src; return (u_int8_t *)paddr; } #endif #ifdef INET6 if (nmp->nm_nam->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)nmp->nm_nam; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); error = in6_selectsrc_addr(fibnum, &sin6->sin6_addr, sin6->sin6_scope_id, NULL, paddr, NULL); CURVNET_RESTORE(); if (error != 0) return (NULL); if (IN6_IS_ADDR_LOOPBACK(paddr)) return (NULL); /* Scope is embedded in */ *isinet6p = 1; return (u_int8_t *)paddr; } #endif return (NULL); } /* * Copy NFS uid, gids from the cred structure. */ void newnfs_copyincred(struct ucred *cr, struct nfscred *nfscr) { int i; KASSERT(cr->cr_ngroups >= 0, ("newnfs_copyincred: negative cr_ngroups")); nfscr->nfsc_uid = cr->cr_uid; nfscr->nfsc_ngroups = MIN(cr->cr_ngroups, NFS_MAXGRPS + 1); for (i = 0; i < nfscr->nfsc_ngroups; i++) nfscr->nfsc_groups[i] = cr->cr_groups[i]; } /* * Do any client specific initialization. */ void nfscl_init(void) { static int inited = 0; if (inited) return; inited = 1; nfscl_inited = 1; ncl_pbuf_freecnt = nswbuf / 2 + 1; } /* * Check each of the attributes to be set, to ensure they aren't already * the correct value. Disable setting ones already correct. */ int nfscl_checksattr(struct vattr *vap, struct nfsvattr *nvap) { if (vap->va_mode != (mode_t)VNOVAL) { if (vap->va_mode == nvap->na_mode) vap->va_mode = (mode_t)VNOVAL; } if (vap->va_uid != (uid_t)VNOVAL) { if (vap->va_uid == nvap->na_uid) vap->va_uid = (uid_t)VNOVAL; } if (vap->va_gid != (gid_t)VNOVAL) { if (vap->va_gid == nvap->na_gid) vap->va_gid = (gid_t)VNOVAL; } if (vap->va_size != VNOVAL) { if (vap->va_size == nvap->na_size) vap->va_size = VNOVAL; } /* * We are normally called with only a partially initialized * VAP. Since the NFSv3 spec says that server may use the * file attributes to store the verifier, the spec requires * us to do a SETATTR RPC. FreeBSD servers store the verifier * in atime, but we can't really assume that all servers will * so we ensure that our SETATTR sets both atime and mtime. * Set the VA_UTIMES_NULL flag for this case, so that * the server's time will be used. This is needed to * work around a bug in some Solaris servers, where * setting the time TOCLIENT causes the Setattr RPC * to return NFS_OK, but not set va_mode. */ if (vap->va_mtime.tv_sec == VNOVAL) { vfs_timestamp(&vap->va_mtime); vap->va_vaflags |= VA_UTIMES_NULL; } if (vap->va_atime.tv_sec == VNOVAL) vap->va_atime = vap->va_mtime; return (1); } /* * Map nfsv4 errors to errno.h errors. * The uid and gid arguments are only used for NFSERR_BADOWNER and that * error should only be returned for the Open, Create and Setattr Ops. * As such, most calls can just pass in 0 for those arguments. */ APPLESTATIC int nfscl_maperr(struct thread *td, int error, uid_t uid, gid_t gid) { struct proc *p; if (error < 10000 || error >= NFSERR_STALEWRITEVERF) return (error); if (td != NULL) p = td->td_proc; else p = NULL; switch (error) { case NFSERR_BADOWNER: tprintf(p, LOG_INFO, "No name and/or group mapping for uid,gid:(%d,%d)\n", uid, gid); return (EPERM); case NFSERR_BADNAME: case NFSERR_BADCHAR: printf("nfsv4 char/name not handled by server\n"); return (ENOENT); case NFSERR_STALECLIENTID: case NFSERR_STALESTATEID: case NFSERR_EXPIRED: case NFSERR_BADSTATEID: case NFSERR_BADSESSION: printf("nfsv4 recover err returned %d\n", error); return (EIO); case NFSERR_BADHANDLE: case NFSERR_SERVERFAULT: case NFSERR_BADTYPE: case NFSERR_FHEXPIRED: case NFSERR_RESOURCE: case NFSERR_MOVED: case NFSERR_NOFILEHANDLE: case NFSERR_MINORVERMISMATCH: case NFSERR_OLDSTATEID: case NFSERR_BADSEQID: case NFSERR_LEASEMOVED: case NFSERR_RECLAIMBAD: case NFSERR_BADXDR: case NFSERR_OPILLEGAL: printf("nfsv4 client/server protocol prob err=%d\n", error); return (EIO); default: tprintf(p, LOG_INFO, "nfsv4 err=%d\n", error); return (EIO); }; } /* * Check to see if the process for this owner exists. Return 1 if it doesn't * and 0 otherwise. */ int nfscl_procdoesntexist(u_int8_t *own) { union { u_int32_t lval; u_int8_t cval[4]; } tl; struct proc *p; pid_t pid; int i, ret = 0; /* For the single open_owner of all 0 bytes, just return 0. */ for (i = 0; i < NFSV4CL_LOCKNAMELEN; i++) if (own[i] != 0) break; if (i == NFSV4CL_LOCKNAMELEN) return (0); tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own++; pid = tl.lval; p = pfind_locked(pid); if (p == NULL) return (1); if (p->p_stats == NULL) { PROC_UNLOCK(p); return (0); } tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own++; if (tl.lval != p->p_stats->p_start.tv_sec) { ret = 1; } else { tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own; if (tl.lval != p->p_stats->p_start.tv_usec) ret = 1; } PROC_UNLOCK(p); return (ret); } /* * - nfs pseudo system call for the client */ /* * MPSAFE */ static int nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap) { struct file *fp; struct nfscbd_args nfscbdarg; struct nfsd_nfscbd_args nfscbdarg2; struct nameidata nd; struct nfscl_dumpmntopts dumpmntopts; cap_rights_t rights; char *buf; int error; + struct mount *mp; + struct nfsmount *nmp; if (uap->flag & NFSSVC_CBADDSOCK) { error = copyin(uap->argp, (caddr_t)&nfscbdarg, sizeof(nfscbdarg)); if (error) return (error); /* * Since we don't know what rights might be required, * pretend that we need them all. It is better to be too * careful than too reckless. */ error = fget(td, nfscbdarg.sock, cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); if (error) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); return (EPERM); } error = nfscbd_addsock(fp); fdrop(fp, td); if (!error && nfscl_enablecallb == 0) { nfsv4_cbport = nfscbdarg.port; nfscl_enablecallb = 1; } } else if (uap->flag & NFSSVC_NFSCBD) { if (uap->argp == NULL) return (EINVAL); error = copyin(uap->argp, (caddr_t)&nfscbdarg2, sizeof(nfscbdarg2)); if (error) return (error); error = nfscbd_nfsd(td, &nfscbdarg2); } else if (uap->flag & NFSSVC_DUMPMNTOPTS) { error = copyin(uap->argp, &dumpmntopts, sizeof(dumpmntopts)); if (error == 0 && (dumpmntopts.ndmnt_blen < 256 || dumpmntopts.ndmnt_blen > 1024)) error = EINVAL; if (error == 0) error = nfsrv_lookupfilename(&nd, dumpmntopts.ndmnt_fname, td); if (error == 0 && strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); error = EINVAL; } if (error == 0) { buf = malloc(dumpmntopts.ndmnt_blen, M_TEMP, M_WAITOK); nfscl_retopts(VFSTONFS(nd.ni_vp->v_mount), buf, dumpmntopts.ndmnt_blen); vput(nd.ni_vp); error = copyout(buf, dumpmntopts.ndmnt_buf, dumpmntopts.ndmnt_blen); free(buf, M_TEMP); } + } else if (uap->flag & NFSSVC_FORCEDISM) { + buf = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK); + error = copyinstr(uap->argp, buf, MNAMELEN + 1, NULL); + if (error == 0) { + nmp = NULL; + mtx_lock(&mountlist_mtx); + TAILQ_FOREACH(mp, &mountlist, mnt_list) { + if (strcmp(mp->mnt_stat.f_mntonname, buf) == + 0 && strcmp(mp->mnt_stat.f_fstypename, + "nfs") == 0 && mp->mnt_data != NULL) { + nmp = VFSTONFS(mp); + mtx_lock(&nmp->nm_mtx); + if ((nmp->nm_privflag & + NFSMNTP_FORCEDISM) == 0) { + nmp->nm_privflag |= + (NFSMNTP_FORCEDISM | + NFSMNTP_CANCELRPCS); + mtx_unlock(&nmp->nm_mtx); + } else { + nmp = NULL; + mtx_unlock(&nmp->nm_mtx); + } + break; + } + } + mtx_unlock(&mountlist_mtx); + + if (nmp != NULL) { + /* + * Call newnfs_nmcancelreqs() to cause + * any RPCs in progress on the mount point to + * fail. + * This will cause any process waiting for an + * RPC to complete while holding a vnode lock + * on the mounted-on vnode (such as "df" or + * a non-forced "umount") to fail. + * This will unlock the mounted-on vnode so + * a forced dismount can succeed. + * Then clear NFSMNTP_CANCELRPCS and wakeup(), + * so that nfs_unmount() can complete. + */ + newnfs_nmcancelreqs(nmp); + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; + wakeup(nmp); + mtx_unlock(&nmp->nm_mtx); + } else + error = EINVAL; + } + free(buf, M_TEMP); } else { error = EINVAL; } return (error); } extern int (*nfsd_call_nfscl)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfscl_modevent(module_t mod, int type, void *data) { int error = 0; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) return (0); newnfs_portinit(); mtx_init(&ncl_iod_mutex, "ncl_iod_mutex", NULL, MTX_DEF); nfscl_init(); NFSD_LOCK(); nfsrvd_cbinit(0); NFSD_UNLOCK(); ncl_call_invalcaches = ncl_invalcaches; nfsd_call_nfscl = nfssvc_nfscl; loaded = 1; break; case MOD_UNLOAD: if (nfs_numnfscbd != 0) { error = EBUSY; break; } /* * XXX: Unloading of nfscl module is unsupported. */ #if 0 ncl_call_invalcaches = NULL; nfsd_call_nfscl = NULL; /* and get rid of the mutexes */ mtx_destroy(&ncl_iod_mutex); loaded = 0; break; #else /* FALLTHROUGH */ #endif default: error = EOPNOTSUPP; break; } return error; } static moduledata_t nfscl_mod = { "nfscl", nfscl_modevent, NULL, }; DECLARE_MODULE(nfscl, nfscl_mod, SI_SUB_VFS, SI_ORDER_FIRST); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfscl, 1); MODULE_DEPEND(nfscl, nfscommon, 1, 1, 1); MODULE_DEPEND(nfscl, krpc, 1, 1, 1); MODULE_DEPEND(nfscl, nfssvc, 1, 1, 1); MODULE_DEPEND(nfscl, nfslock, 1, 1, 1); Index: head/sys/fs/nfsclient/nfs_clvfsops.c =================================================================== --- head/sys/fs/nfsclient/nfs_clvfsops.c (revision 321687) +++ head/sys/fs/nfsclient/nfs_clvfsops.c (revision 321688) @@ -1,2025 +1,2043 @@ /*- * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_bootp.h" #include "opt_nfsroot.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(nfscl, "NFSv4 client"); extern int nfscl_ticks; extern struct timeval nfsboottime; extern int nfsrv_useacl; extern int nfscl_debuglevel; extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; extern struct mtx ncl_iod_mutex; NFSCLSTATEMUTEX; MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header"); MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct"); SYSCTL_DECL(_vfs_nfs); static int nfs_ip_paranoia = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, &nfs_ip_paranoia, 0, ""); static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY, downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, ""); /* how long between console messages "nfs server foo not responding" */ static int nfs_tprintf_delay = NFS_TPRINTF_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY, downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); #ifdef NFS_DEBUG int nfs_debug; SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, "Toggle debug flag"); #endif static int nfs_mountroot(struct mount *); static void nfs_sec_name(char *, int *); static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, const char *, struct ucred *, struct thread *); static int mountnfs(struct nfs_args *, struct mount *, struct sockaddr *, char *, u_char *, int, u_char *, int, u_char *, int, struct vnode **, struct ucred *, struct thread *, int, int, int); static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *, struct sockaddr_storage *, int *, off_t *, struct timeval *); static vfs_mount_t nfs_mount; static vfs_cmount_t nfs_cmount; static vfs_unmount_t nfs_unmount; static vfs_root_t nfs_root; static vfs_statfs_t nfs_statfs; static vfs_sync_t nfs_sync; static vfs_sysctl_t nfs_sysctl; static vfs_purge_t nfs_purge; /* * nfs vfs operations. */ static struct vfsops nfs_vfsops = { .vfs_init = ncl_init, .vfs_mount = nfs_mount, .vfs_cmount = nfs_cmount, .vfs_root = nfs_root, .vfs_statfs = nfs_statfs, .vfs_sync = nfs_sync, .vfs_uninit = ncl_uninit, .vfs_unmount = nfs_unmount, .vfs_sysctl = nfs_sysctl, .vfs_purge = nfs_purge, }; VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfs, 1); MODULE_DEPEND(nfs, nfscommon, 1, 1, 1); MODULE_DEPEND(nfs, krpc, 1, 1, 1); MODULE_DEPEND(nfs, nfssvc, 1, 1, 1); MODULE_DEPEND(nfs, nfslock, 1, 1, 1); /* * This structure is now defined in sys/nfs/nfs_diskless.c so that it * can be shared by both NFS clients. It is declared here so that it * will be defined for kernels built without NFS_ROOT, although it * isn't used in that case. */ #if !defined(NFS_ROOT) struct nfs_diskless nfs_diskless = { { { 0 } } }; struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; int nfs_diskless_valid = 0; #endif SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD, &nfs_diskless_valid, 0, "Has the diskless struct been filled correctly"); SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, nfsv3_diskless.root_hostnam, 0, "Path to nfs root"); SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr), "%Ssockaddr_in", "Diskless root nfs address"); void newnfsargs_ntoh(struct nfs_args *); static int nfs_mountdiskless(char *, struct sockaddr_in *, struct nfs_args *, struct thread *, struct vnode **, struct mount *); static void nfs_convert_diskless(void); static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs); int newnfs_iosize(struct nfsmount *nmp) { int iosize, maxio; /* First, set the upper limit for iosize */ if (nmp->nm_flag & NFSMNT_NFSV4) { maxio = NFS_MAXBSIZE; } else if (nmp->nm_flag & NFSMNT_NFSV3) { if (nmp->nm_sotype == SOCK_DGRAM) maxio = NFS_MAXDGRAMDATA; else maxio = NFS_MAXBSIZE; } else { maxio = NFS_V2MAXDATA; } if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0) nmp->nm_rsize = maxio; if (nmp->nm_rsize > NFS_MAXBSIZE) nmp->nm_rsize = NFS_MAXBSIZE; if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0) nmp->nm_readdirsize = maxio; if (nmp->nm_readdirsize > nmp->nm_rsize) nmp->nm_readdirsize = nmp->nm_rsize; if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0) nmp->nm_wsize = maxio; if (nmp->nm_wsize > NFS_MAXBSIZE) nmp->nm_wsize = NFS_MAXBSIZE; /* * Calculate the size used for io buffers. Use the larger * of the two sizes to minimise nfs requests but make sure * that it is at least one VM page to avoid wasting buffer * space. It must also be at least NFS_DIRBLKSIZ, since * that is the buffer size used for directories. */ iosize = imax(nmp->nm_rsize, nmp->nm_wsize); iosize = imax(iosize, PAGE_SIZE); iosize = imax(iosize, NFS_DIRBLKSIZ); nmp->nm_mountp->mnt_stat.f_iosize = iosize; return (iosize); } static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) { args->version = NFS_ARGSVERSION; args->addr = oargs->addr; args->addrlen = oargs->addrlen; args->sotype = oargs->sotype; args->proto = oargs->proto; args->fh = oargs->fh; args->fhsize = oargs->fhsize; args->flags = oargs->flags; args->wsize = oargs->wsize; args->rsize = oargs->rsize; args->readdirsize = oargs->readdirsize; args->timeo = oargs->timeo; args->retrans = oargs->retrans; args->readahead = oargs->readahead; args->hostname = oargs->hostname; } static void nfs_convert_diskless(void) { bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, sizeof(struct ifaliasreq)); bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, sizeof(struct sockaddr_in)); nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args); if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) { nfsv3_diskless.root_fhsize = NFSX_MYFH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH); } else { nfsv3_diskless.root_fhsize = NFSX_V2FH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); } bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, sizeof(struct sockaddr_in)); bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN); nfsv3_diskless.root_time = nfs_diskless.root_time; bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam, MAXHOSTNAMELEN); nfs_diskless_valid = 3; } /* * nfs statfs call */ static int nfs_statfs(struct mount *mp, struct statfs *sbp) { struct vnode *vp; struct thread *td; struct nfsmount *nmp = VFSTONFS(mp); struct nfsvattr nfsva; struct nfsfsinfo fs; struct nfsstatfs sb; int error = 0, attrflag, gotfsinfo = 0, ret; struct nfsnode *np; td = curthread; error = vfs_busy(mp, MBF_NOWAIT); if (error) return (error); error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) { vfs_unbusy(mp); return (error); } vp = NFSTOV(np); mtx_lock(&nmp->nm_mtx); if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { mtx_unlock(&nmp->nm_mtx); error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); if (!error) gotfsinfo = 1; } else mtx_unlock(&nmp->nm_mtx); if (!error) error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); if (error != 0) NFSCL_DEBUG(2, "statfs=%d\n", error); if (attrflag == 0) { ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, td->td_ucred, td, &nfsva, NULL, NULL); if (ret) { /* * Just set default values to get things going. */ NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); nfsva.na_vattr.va_type = VDIR; nfsva.na_vattr.va_mode = 0777; nfsva.na_vattr.va_nlink = 100; nfsva.na_vattr.va_uid = (uid_t)0; nfsva.na_vattr.va_gid = (gid_t)0; nfsva.na_vattr.va_fileid = 2; nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; } } (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { mtx_lock(&nmp->nm_mtx); if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4)) nfscl_loadfsinfo(nmp, &fs); nfscl_loadsbinfo(nmp, &sb, sbp); sbp->f_iosize = newnfs_iosize(nmp); mtx_unlock(&nmp->nm_mtx); if (sbp != &mp->mnt_stat) { bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } vput(vp); vfs_unbusy(mp); return (error); } /* * nfs version 3 fsinfo rpc call */ int ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred, struct thread *td) { struct nfsfsinfo fs; struct nfsvattr nfsva; int error, attrflag; error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL); if (!error) { if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); mtx_lock(&nmp->nm_mtx); nfscl_loadfsinfo(nmp, &fs); mtx_unlock(&nmp->nm_mtx); } return (error); } /* * Mount a remote root fs via. nfs. This depends on the info in the * nfs_diskless structure that has been filled in properly by some primary * bootstrap. * It goes something like this: * - do enough of "ifconfig" by calling ifioctl() so that the system * can talk to the server * - If nfs_diskless.mygateway is filled in, use that address as * a default gateway. * - build the rootfs mount point and call mountnfs() to do the rest. * * It is assumed to be safe to read, modify, and write the nfsv3_diskless * structure, as well as other global NFS client variables here, as * nfs_mountroot() will be called once in the boot before any other NFS * client activity occurs. */ static int nfs_mountroot(struct mount *mp) { struct thread *td = curthread; struct nfsv3_diskless *nd = &nfsv3_diskless; struct socket *so; struct vnode *vp; struct ifreq ir; int error; u_long l; char buf[128]; char *cp; #if defined(BOOTP_NFSROOT) && defined(BOOTP) bootpc_init(); /* use bootp to get nfs_diskless filled in */ #elif defined(NFS_ROOT) nfs_setup_diskless(); #endif if (nfs_diskless_valid == 0) return (-1); if (nfs_diskless_valid == 1) nfs_convert_diskless(); /* * Do enough of ifconfig(8) so that the critical net interface can * talk to the server. */ error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0, td->td_ucred, td); if (error) panic("nfs_mountroot: socreate(%04x): %d", nd->myif.ifra_addr.sa_family, error); #if 0 /* XXX Bad idea */ /* * We might not have been told the right interface, so we pass * over the first ten interfaces of the same kind, until we get * one of them configured. */ for (i = strlen(nd->myif.ifra_name) - 1; nd->myif.ifra_name[i] >= '0' && nd->myif.ifra_name[i] <= '9'; nd->myif.ifra_name[i] ++) { error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if(!error) break; } #endif error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if (error) panic("nfs_mountroot: SIOCAIFADDR: %d", error); if ((cp = kern_getenv("boot.netif.mtu")) != NULL) { ir.ifr_mtu = strtol(cp, NULL, 10); bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ); freeenv(cp); error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td); if (error) printf("nfs_mountroot: SIOCSIFMTU: %d", error); } soclose(so); /* * If the gateway field is filled in, set it as the default route. * Note that pxeboot will set a default route of 0 if the route * is not set by the DHCP server. Check also for a value of 0 * to avoid panicking inappropriately in that situation. */ if (nd->mygateway.sin_len != 0 && nd->mygateway.sin_addr.s_addr != 0) { struct sockaddr_in mask, sin; bzero((caddr_t)&mask, sizeof(mask)); sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ CURVNET_SET(TD_TO_VNET(td)); error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); CURVNET_RESTORE(); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); } /* * Create the rootfs mount point. */ nd->root_args.fh = nd->root_fh; nd->root_args.fhsize = nd->root_fhsize; l = ntohl(nd->root_saddr.sin_addr.s_addr); snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", (l >> 24) & 0xff, (l >> 16) & 0xff, (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam); printf("NFS ROOT: %s\n", buf); nd->root_args.hostname = buf; if ((error = nfs_mountdiskless(buf, &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) { return (error); } /* * This is not really an nfs issue, but it is much easier to * set hostname here and then let the "/etc/rc.xxx" files * mount the right /var based upon its preset value. */ mtx_lock(&prison0.pr_mtx); strlcpy(prison0.pr_hostname, nd->my_hostnam, sizeof(prison0.pr_hostname)); mtx_unlock(&prison0.pr_mtx); inittodr(ntohl(nd->root_time)); return (0); } /* * Internal version of mount system call for diskless setup. */ static int nfs_mountdiskless(char *path, struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, struct vnode **vpp, struct mount *mp) { struct sockaddr *nam; int dirlen, error; char *dirpath; /* * Find the directory path in "path", which also has the server's * name/ip address in it. */ dirpath = strchr(path, ':'); if (dirpath != NULL) dirlen = strlen(++dirpath); else dirlen = 0; nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen, NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO, NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) { printf("nfs_mountroot: mount %s on /: %d\n", path, error); return (error); } return (0); } static void nfs_sec_name(char *sec, int *flagsp) { if (!strcmp(sec, "krb5")) *flagsp |= NFSMNT_KERB; else if (!strcmp(sec, "krb5i")) *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY); else if (!strcmp(sec, "krb5p")) *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY); } static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, const char *hostname, struct ucred *cred, struct thread *td) { int adjsock; char *p; /* * Set read-only flag if requested; otherwise, clear it if this is * an update. If this is not an update, then either the read-only * flag is already clear, or this is a root mount and it was set * intentionally at some previous point. */ if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); } else if (mp->mnt_flag & MNT_UPDATE) { MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); } /* * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes * no sense in that context. Also, set up appropriate retransmit * and soft timeout behavior. */ if (argp->sotype == SOCK_STREAM) { nmp->nm_flag &= ~NFSMNT_NOCONN; nmp->nm_timeo = NFS_MAXTIMEO; if ((argp->flags & NFSMNT_NFSV4) != 0) nmp->nm_retry = INT_MAX; else nmp->nm_retry = NFS_RETRANS_TCP; } /* Also clear RDIRPLUS if NFSv2, it crashes some servers */ if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { argp->flags &= ~NFSMNT_RDIRPLUS; nmp->nm_flag &= ~NFSMNT_RDIRPLUS; } /* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */ if (nmp->nm_minorvers == 0) { argp->flags &= ~NFSMNT_ONEOPENOWN; nmp->nm_flag &= ~NFSMNT_ONEOPENOWN; } /* Re-bind if rsrvd port requested and wasn't on one */ adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) && (argp->flags & NFSMNT_RESVPORT); /* Also re-bind if we're switching to/from a connected UDP socket */ adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) != (argp->flags & NFSMNT_NOCONN)); /* Update flags atomically. Don't change the lock bits. */ nmp->nm_flag = argp->flags | nmp->nm_flag; if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; if (nmp->nm_timeo < NFS_MINTIMEO) nmp->nm_timeo = NFS_MINTIMEO; else if (nmp->nm_timeo > NFS_MAXTIMEO) nmp->nm_timeo = NFS_MAXTIMEO; } if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { nmp->nm_retry = argp->retrans; if (nmp->nm_retry > NFS_MAXREXMIT) nmp->nm_retry = NFS_MAXREXMIT; } if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { nmp->nm_wsize = argp->wsize; /* * Clip at the power of 2 below the size. There is an * issue (not isolated) that causes intermittent page * faults if this is not done. */ if (nmp->nm_wsize > NFS_FABLKSIZE) nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1); else nmp->nm_wsize = NFS_FABLKSIZE; } if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { nmp->nm_rsize = argp->rsize; /* * Clip at the power of 2 below the size. There is an * issue (not isolated) that causes intermittent page * faults if this is not done. */ if (nmp->nm_rsize > NFS_FABLKSIZE) nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1); else nmp->nm_rsize = NFS_FABLKSIZE; } if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { nmp->nm_readdirsize = argp->readdirsize; } if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) nmp->nm_acregmin = argp->acregmin; else nmp->nm_acregmin = NFS_MINATTRTIMO; if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) nmp->nm_acregmax = argp->acregmax; else nmp->nm_acregmax = NFS_MAXATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) nmp->nm_acdirmin = argp->acdirmin; else nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) nmp->nm_acdirmax = argp->acdirmax; else nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; if (nmp->nm_acdirmin > nmp->nm_acdirmax) nmp->nm_acdirmin = nmp->nm_acdirmax; if (nmp->nm_acregmin > nmp->nm_acregmax) nmp->nm_acregmin = nmp->nm_acregmax; if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { if (argp->readahead <= NFS_MAXRAHEAD) nmp->nm_readahead = argp->readahead; else nmp->nm_readahead = NFS_MAXRAHEAD; } if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) { if (argp->wcommitsize < nmp->nm_wsize) nmp->nm_wcommitsize = nmp->nm_wsize; else nmp->nm_wcommitsize = argp->wcommitsize; } adjsock |= ((nmp->nm_sotype != argp->sotype) || (nmp->nm_soproto != argp->proto)); if (nmp->nm_client != NULL && adjsock) { int haslock = 0, error = 0; if (nmp->nm_sotype == SOCK_STREAM) { error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock); if (!error) haslock = 1; } if (!error) { newnfs_disconnect(&nmp->nm_sockreq); if (haslock) newnfs_sndunlock(&nmp->nm_sockreq.nr_lock); nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; if (nmp->nm_sotype == SOCK_DGRAM) while (newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)) { printf("newnfs_args: retrying connect\n"); (void) nfs_catnap(PSOCK, 0, "nfscon"); } } } else { nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; } if (hostname != NULL) { strlcpy(nmp->nm_hostname, hostname, sizeof(nmp->nm_hostname)); p = strchr(nmp->nm_hostname, ':'); if (p != NULL) *p = '\0'; } } static const char *nfs_opts[] = { "from", "nfs_args", "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union", "noclusterr", "noclusterw", "multilabel", "acls", "force", "update", "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize", "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh", "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr", "pnfs", "wcommitsize", "oneopenown", NULL }; /* * Parse the "from" mountarg, passed by the generic mount(8) program * or the mountroot code. This is used when rerooting into NFS. * * Note that the "hostname" is actually a "hostname:/share/path" string. */ static int nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep, struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp) { char *nam, *delimp, *hostp, *spec; int error, have_bracket = 0, offset, rv, speclen; struct sockaddr_in *sin; size_t len; error = vfs_getopt(opts, "from", (void **)&spec, &speclen); if (error != 0) return (error); nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK); /* * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs(). */ if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL && *(delimp + 1) == ':') { hostp = spec + 1; spec = delimp + 2; have_bracket = 1; } else if ((delimp = strrchr(spec, ':')) != NULL) { hostp = spec; spec = delimp + 1; } else if ((delimp = strrchr(spec, '@')) != NULL) { printf("%s: path@server syntax is deprecated, " "use server:path\n", __func__); hostp = delimp + 1; } else { printf("%s: no : nfs-name\n", __func__); free(nam, M_TEMP); return (EINVAL); } *delimp = '\0'; /* * If there has been a trailing slash at mounttime it seems * that some mountd implementations fail to remove the mount * entries from their mountlist while unmounting. */ for (speclen = strlen(spec); speclen > 1 && spec[speclen - 1] == '/'; speclen--) spec[speclen - 1] = '\0'; if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) { printf("%s: %s:%s: name too long", __func__, hostp, spec); free(nam, M_TEMP); return (EINVAL); } /* Make both '@' and ':' notations equal */ if (*hostp != '\0') { len = strlen(hostp); offset = 0; if (have_bracket) nam[offset++] = '['; memmove(nam + offset, hostp, len); if (have_bracket) nam[len + offset++] = ']'; nam[len + offset++] = ':'; memmove(nam + len + offset, spec, speclen); nam[len + speclen + offset] = '\0'; } else nam[0] = '\0'; /* * XXX: IPv6 */ sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK); rv = inet_pton(AF_INET, hostp, &sin->sin_addr); if (rv != 1) { printf("%s: cannot parse '%s', inet_pton() returned %d\n", __func__, hostp, rv); free(nam, M_TEMP); free(sin, M_SONAME); return (EINVAL); } sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; /* * XXX: hardcoded port number. */ sin->sin_port = htons(2049); *hostnamep = strdup(nam, M_NEWNFSMNT); *sinp = sin; strlcpy(dirpath, spec, dirpathsize); *dirlenp = strlen(dirpath); free(nam, M_TEMP); return (0); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the getsockaddr() call because getsockaddr() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_mount(struct mount *mp) { struct nfs_args args = { .version = NFS_ARGSVERSION, .addr = NULL, .addrlen = sizeof (struct sockaddr_in), .sotype = SOCK_STREAM, .proto = 0, .fh = NULL, .fhsize = 0, .flags = NFSMNT_RESVPORT, .wsize = NFS_WSIZE, .rsize = NFS_RSIZE, .readdirsize = NFS_READDIRSIZE, .timeo = 10, .retrans = NFS_RETRANS, .readahead = NFS_DEFRAHEAD, .wcommitsize = 0, /* was: NQ_DEFLEASE */ .hostname = NULL, .acregmin = NFS_MINATTRTIMO, .acregmax = NFS_MAXATTRTIMO, .acdirmin = NFS_MINDIRATTRTIMO, .acdirmax = NFS_MAXDIRATTRTIMO, }; int error = 0, ret, len; struct sockaddr *nam = NULL; struct vnode *vp; struct thread *td; char *hst; u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100]; char *cp, *opt, *name, *secname; int nametimeo = NFS_DEFAULT_NAMETIMEO; int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO; int minvers = 0; int dirlen, has_nfs_args_opt, has_nfs_from_opt, krbnamelen, srvkrbnamelen; size_t hstlen; has_nfs_args_opt = 0; has_nfs_from_opt = 0; hst = malloc(MNAMELEN, M_TEMP, M_WAITOK); if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { error = EINVAL; goto out; } td = curthread; if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS && nfs_diskless_valid != 0) { error = nfs_mountroot(mp); goto out; } nfscl_init(); /* * The old mount_nfs program passed the struct nfs_args * from userspace to kernel. The new mount_nfs program * passes string options via nmount() from userspace to kernel * and we populate the struct nfs_args in the kernel. */ if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) { error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof(args)); if (error != 0) goto out; if (args.version != NFS_ARGSVERSION) { error = EPROGMISMATCH; goto out; } has_nfs_args_opt = 1; } /* Handle the new style options. */ if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) { args.acdirmin = args.acdirmax = args.acregmin = args.acregmax = 0; args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX | NFSMNT_ACREGMIN | NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0) args.flags |= NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0) args.flags &= ~NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0) args.flags |= NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0) args.flags &= ~NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0) args.flags |= NFSMNT_INT; if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0) args.flags |= NFSMNT_RDIRPLUS; if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0) args.flags |= NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0) args.flags &= ~NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0) args.flags |= NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0) args.flags &= ~NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0) args.sotype = SOCK_STREAM; if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0) args.flags |= NFSMNT_NFSV3; if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) { args.flags |= NFSMNT_NFSV4; args.sotype = SOCK_STREAM; } if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0) args.flags |= NFSMNT_ALLGSSNAME; if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0) args.flags |= NFSMNT_NOCTO; if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0) args.flags |= NFSMNT_NONCONTIGWR; if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0) args.flags |= NFSMNT_PNFS; if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0) args.flags |= NFSMNT_ONEOPENOWN; if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readdirsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readdirsize); if (ret != 1 || args.readdirsize <= 0) { vfs_mount_error(mp, "illegal readdirsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READDIRSIZE; } if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readahead"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readahead); if (ret != 1 || args.readahead <= 0) { vfs_mount_error(mp, "illegal readahead: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READAHEAD; } if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal wsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.wsize); if (ret != 1 || args.wsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WSIZE; } if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal rsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.rsize); if (ret != 1 || args.rsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RSIZE; } if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal retrans"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.retrans); if (ret != 1 || args.retrans <= 0) { vfs_mount_error(mp, "illegal retrans: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RETRANS; } if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmin); if (ret != 1 || args.acregmin < 0) { vfs_mount_error(mp, "illegal actimeo: %s", opt); error = EINVAL; goto out; } args.acdirmin = args.acdirmax = args.acregmax = args.acregmin; args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX | NFSMNT_ACREGMIN | NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmin); if (ret != 1 || args.acregmin < 0) { vfs_mount_error(mp, "illegal acregmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMIN; } if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmax); if (ret != 1 || args.acregmax < 0) { vfs_mount_error(mp, "illegal acregmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmin); if (ret != 1 || args.acdirmin < 0) { vfs_mount_error(mp, "illegal acdirmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMIN; } if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmax); if (ret != 1 || args.acdirmax < 0) { vfs_mount_error(mp, "illegal acdirmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMAX; } if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.wcommitsize); if (ret != 1 || args.wcommitsize < 0) { vfs_mount_error(mp, "illegal wcommitsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WCOMMITSIZE; } if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.timeo); if (ret != 1 || args.timeo <= 0) { vfs_mount_error(mp, "illegal timeo: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_TIMEO; } if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.timeo); if (ret != 1 || args.timeo <= 0) { vfs_mount_error(mp, "illegal timeout: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_TIMEO; } if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &nametimeo); if (ret != 1 || nametimeo < 0) { vfs_mount_error(mp, "illegal nametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &negnametimeo); if (ret != 1 || negnametimeo < 0) { vfs_mount_error(mp, "illegal negnametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &minvers); if (ret != 1 || minvers < 0 || minvers > 1 || (args.flags & NFSMNT_NFSV4) == 0) { vfs_mount_error(mp, "illegal minorversion: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "sec", (void **) &secname, NULL) == 0) nfs_sec_name(secname, &args.flags); if (mp->mnt_flag & MNT_UPDATE) { struct nfsmount *nmp = VFSTONFS(mp); if (nmp == NULL) { error = EIO; goto out; } /* * If a change from TCP->UDP is done and there are thread(s) * that have I/O RPC(s) in progress with a transfer size * greater than NFS_MAXDGRAMDATA, those thread(s) will be * hung, retrying the RPC(s) forever. Usually these threads * will be seen doing an uninterruptible sleep on wait channel * "nfsreq". */ if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM) tprintf(td->td_proc, LOG_WARNING, "Warning: mount -u that changes TCP->UDP can result in hung threads\n"); /* * When doing an update, we can't change version, * security, switch lockd strategies, change cookie * translation or switch oneopenown. */ args.flags = (args.flags & ~(NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY | NFSMNT_ONEOPENOWN | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) | (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY | NFSMNT_ONEOPENOWN | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td); goto out; } /* * Make the nfs_ip_paranoia sysctl serve as the default connection * or no-connection mode for those protocols that support * no-connection mode (the flag will be cleared later for protocols * that do not support no-connection mode). This will allow a client * to receive replies from a different IP then the request was * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), * not 0. */ if (nfs_ip_paranoia == 0) args.flags |= NFSMNT_NOCONN; if (has_nfs_args_opt != 0) { /* * In the 'nfs_args' case, the pointers in the args * structure are in userland - we copy them in here. */ if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); if (error != 0) goto out; error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen); if (error != 0) goto out; bzero(&hst[hstlen], MNAMELEN - hstlen); args.hostname = hst; /* getsockaddr() call must be after above copyin() calls */ error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); if (error != 0) goto out; } else if (nfs_mount_parse_from(mp->mnt_optnew, &args.hostname, (struct sockaddr_in **)&nam, dirpath, sizeof(dirpath), &dirlen) == 0) { has_nfs_from_opt = 1; bcopy(args.hostname, hst, MNAMELEN); hst[MNAMELEN - 1] = '\0'; /* * This only works with NFSv4 for now. */ args.fhsize = 0; args.flags |= NFSMNT_NFSV4; args.sotype = SOCK_STREAM; } else { if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh, &args.fhsize) == 0) { if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } bcopy(args.fh, nfh, args.fhsize); } else { args.fhsize = 0; } (void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname, &len); if (args.hostname == NULL) { vfs_mount_error(mp, "Invalid hostname"); error = EINVAL; goto out; } if (len >= MNAMELEN) { vfs_mount_error(mp, "Hostname too long"); error = EINVAL; goto out; } bcopy(args.hostname, hst, len); hst[len] = '\0'; } if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0) strlcpy(srvkrbname, name, sizeof (srvkrbname)); else { snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst); cp = strchr(srvkrbname, ':'); if (cp != NULL) *cp = '\0'; } srvkrbnamelen = strlen(srvkrbname); if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0) strlcpy(krbname, name, sizeof (krbname)); else krbname[0] = '\0'; krbnamelen = strlen(krbname); if (has_nfs_from_opt == 0) { if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0) strlcpy(dirpath, name, sizeof (dirpath)); else dirpath[0] = '\0'; dirlen = strlen(dirpath); } if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) { if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr, &args.addrlen) == 0) { if (args.addrlen > SOCK_MAXADDRLEN) { error = ENAMETOOLONG; goto out; } nam = malloc(args.addrlen, M_SONAME, M_WAITOK); bcopy(args.addr, nam, args.addrlen); nam->sa_len = args.addrlen; } else { vfs_mount_error(mp, "No server address"); error = EINVAL; goto out; } } args.fh = nfh; error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath, dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td, nametimeo, negnametimeo, minvers); out: if (!error) { MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF | MNTK_USES_BCACHE; if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0) mp->mnt_kern_flag |= MNTK_NULL_NOCACHE; MNT_IUNLOCK(mp); } free(hst, M_TEMP); return (error); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the getsockaddr() call because getsockaddr() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_cmount(struct mntarg *ma, void *data, uint64_t flags) { int error; struct nfs_args args; error = copyin(data, &args, sizeof (struct nfs_args)); if (error) return error; ma = mount_arg(ma, "nfs_args", &args, sizeof args); error = kernel_mount(ma, flags); return (error); } /* * Common code for mount and mountroot */ static int mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen, u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp, struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo, int minvers) { struct nfsmount *nmp; struct nfsnode *np; int error, trycnt, ret; struct nfsvattr nfsva; struct nfsclclient *clp; struct nfsclds *dsp, *tdsp; uint32_t lease; static u_int64_t clval = 0; NFSCL_DEBUG(3, "in mnt\n"); clp = NULL; if (mp->mnt_flag & MNT_UPDATE) { nmp = VFSTONFS(mp); printf("%s: MNT_UPDATE is no longer handled here\n", __func__); FREE(nam, M_SONAME); return (0); } else { MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) + krbnamelen + dirlen + srvkrbnamelen + 2, M_NEWNFSMNT, M_WAITOK | M_ZERO); TAILQ_INIT(&nmp->nm_bufq); TAILQ_INIT(&nmp->nm_sess); if (clval == 0) clval = (u_int64_t)nfsboottime.tv_sec; nmp->nm_clval = clval++; nmp->nm_krbnamelen = krbnamelen; nmp->nm_dirpathlen = dirlen; nmp->nm_srvkrbnamelen = srvkrbnamelen; if (td->td_ucred->cr_uid != (uid_t)0) { /* * nm_uid is used to get KerberosV credentials for * the nfsv4 state handling operations if there is * no host based principal set. Use the uid of * this user if not root, since they are doing the * mount. I don't think setting this for root will * work, since root normally does not have user * credentials in a credentials cache. */ nmp->nm_uid = td->td_ucred->cr_uid; } else { /* * Just set to -1, so it won't be used. */ nmp->nm_uid = (uid_t)-1; } /* Copy and null terminate all the names */ if (nmp->nm_krbnamelen > 0) { bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen); nmp->nm_name[nmp->nm_krbnamelen] = '\0'; } if (nmp->nm_dirpathlen > 0) { bcopy(dirpath, NFSMNT_DIRPATH(nmp), nmp->nm_dirpathlen); nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen + 1] = '\0'; } if (nmp->nm_srvkrbnamelen > 0) { bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp), nmp->nm_srvkrbnamelen); nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen + nmp->nm_srvkrbnamelen + 2] = '\0'; } nmp->nm_sockreq.nr_cred = crhold(cred); mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF); mp->mnt_data = nmp; nmp->nm_getinfo = nfs_getnlminfo; nmp->nm_vinvalbuf = ncl_vinvalbuf; } vfs_getnewfsid(mp); nmp->nm_mountp = mp; mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK); /* * Since nfs_decode_args() might optionally set them, these * need to be set to defaults before the call, so that the * optional settings aren't overwritten. */ nmp->nm_nametimeo = nametimeo; nmp->nm_negnametimeo = negnametimeo; nmp->nm_timeo = NFS_TIMEO; nmp->nm_retry = NFS_RETRANS; nmp->nm_readahead = NFS_DEFRAHEAD; /* This is empirical approximation of sqrt(hibufspace) * 256. */ nmp->nm_wcommitsize = NFS_MAXBSIZE / 256; while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace) nmp->nm_wcommitsize *= 2; nmp->nm_wcommitsize *= 256; if ((argp->flags & NFSMNT_NFSV4) != 0) nmp->nm_minorvers = minvers; else nmp->nm_minorvers = 0; nfs_decode_args(mp, nmp, argp, hst, cred, td); /* * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too * high, depending on whether we end up with negative offsets in * the client or server somewhere. 2GB-1 may be safer. * * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum * that we can handle until we find out otherwise. */ if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) nmp->nm_maxfilesize = 0xffffffffLL; else nmp->nm_maxfilesize = OFF_MAX; if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { nmp->nm_wsize = NFS_WSIZE; nmp->nm_rsize = NFS_RSIZE; nmp->nm_readdirsize = NFS_READDIRSIZE; } nmp->nm_numgrps = NFS_MAXGRPS; nmp->nm_tprintf_delay = nfs_tprintf_delay; if (nmp->nm_tprintf_delay < 0) nmp->nm_tprintf_delay = 0; nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay; if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; nmp->nm_fhsize = argp->fhsize; if (nmp->nm_fhsize > 0) bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); nmp->nm_nam = nam; /* Set up the sockets and per-host congestion */ nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; nmp->nm_sockreq.nr_prog = NFS_PROG; if ((argp->flags & NFSMNT_NFSV4)) nmp->nm_sockreq.nr_vers = NFS_VER4; else if ((argp->flags & NFSMNT_NFSV3)) nmp->nm_sockreq.nr_vers = NFS_VER3; else nmp->nm_sockreq.nr_vers = NFS_VER2; if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0))) goto bad; /* For NFSv4.1, get the clientid now. */ if (nmp->nm_minorvers > 0) { NFSCL_DEBUG(3, "at getcl\n"); error = nfscl_getcl(mp, cred, td, 0, &clp); NFSCL_DEBUG(3, "aft getcl=%d\n", error); if (error != 0) goto bad; } if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) && nmp->nm_dirpathlen > 0) { NFSCL_DEBUG(3, "in dirp\n"); /* * If the fhsize on the mount point == 0 for V4, the mount * path needs to be looked up. */ trycnt = 3; do { error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, td); NFSCL_DEBUG(3, "aft dirp=%d\n", error); if (error) (void) nfs_catnap(PZERO, error, "nfsgetdirp"); } while (error && --trycnt > 0); if (error) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); goto bad; } } /* * A reference count is needed on the nfsnode representing the * remote root. If this object is not persistent, then backward * traversals of the mount point (i.e. "..") will not work if * the nfsnode gets flushed out of the cache. Ufs does not have * this problem, because one can identify root inodes by their * number == UFS_ROOTINO (2). */ if (nmp->nm_fhsize > 0) { /* * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set * non-zero for the root vnode. f_iosize will be set correctly * by nfs_statfs() before any I/O occurs. */ mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ; error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) goto bad; *vpp = NFSTOV(np); /* * Get file attributes and transfer parameters for the * mountpoint. This has the side effect of filling in * (*vpp)->v_type with the correct value. */ ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, cred, td, &nfsva, NULL, &lease); if (ret) { /* * Just set default values to get things going. */ NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); nfsva.na_vattr.va_type = VDIR; nfsva.na_vattr.va_mode = 0777; nfsva.na_vattr.va_nlink = 100; nfsva.na_vattr.va_uid = (uid_t)0; nfsva.na_vattr.va_gid = (gid_t)0; nfsva.na_vattr.va_fileid = 2; nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; lease = 60; } (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1); if (nmp->nm_minorvers > 0) { NFSCL_DEBUG(3, "lease=%d\n", (int)lease); NFSLOCKCLSTATE(); clp->nfsc_renew = NFSCL_RENEW(lease); clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clp->nfsc_clientidrev++; if (clp->nfsc_clientidrev == 0) clp->nfsc_clientidrev++; NFSUNLOCKCLSTATE(); /* * Mount will succeed, so the renew thread can be * started now. */ nfscl_start_renewthread(clp); nfscl_clientrelease(clp); } if (argp->flags & NFSMNT_NFSV3) ncl_fsinfo(nmp, *vpp, cred, td); /* Mark if the mount point supports NFSv4 ACLs. */ if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 && ret == 0 && NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); } /* * Lose the lock but keep the ref. */ NFSVOPUNLOCK(*vpp, 0); return (0); } error = EIO; bad: if (clp != NULL) nfscl_clientrelease(clp); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); if (nmp->nm_sockreq.nr_auth != NULL) AUTH_DESTROY(nmp->nm_sockreq.nr_auth); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); if (nmp->nm_clp != NULL) { NFSLOCKCLSTATE(); LIST_REMOVE(nmp->nm_clp, nfsc_list); NFSUNLOCKCLSTATE(); free(nmp->nm_clp, M_NFSCLCLIENT); } TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) { if (dsp != TAILQ_FIRST(&nmp->nm_sess) && dsp->nfsclds_sockp != NULL) newnfs_disconnect(dsp->nfsclds_sockp); nfscl_freenfsclds(dsp); } FREE(nmp, M_NEWNFSMNT); FREE(nam, M_SONAME); return (error); } /* * unmount system call */ static int nfs_unmount(struct mount *mp, int mntflags) { struct thread *td; struct nfsmount *nmp; int error, flags = 0, i, trycnt = 0; struct nfsclds *dsp, *tdsp; td = curthread; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; nmp = VFSTONFS(mp); /* * Goes something like this.. * - Call vflush() to clear out vnodes for this filesystem * - Close the socket * - Free up the data structures */ /* In the forced case, cancel any outstanding requests. */ if (mntflags & MNT_FORCE) { error = newnfs_nmcancelreqs(nmp); if (error) goto out; /* For a forced close, get rid of the renew thread now */ nfscl_umount(nmp, td); } /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ do { error = vflush(mp, 1, flags, td); if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30) (void) nfs_catnap(PSOCK, error, "newndm"); } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30); if (error) goto out; /* * We are now committed to the unmount. */ if ((mntflags & MNT_FORCE) == 0) nfscl_umount(nmp, td); + else { + mtx_lock(&nmp->nm_mtx); + nmp->nm_privflag |= NFSMNTP_FORCEDISM; + mtx_unlock(&nmp->nm_mtx); + } /* Make sure no nfsiods are assigned to this mount. */ mtx_lock(&ncl_iod_mutex); for (i = 0; i < NFS_MAXASYNCDAEMON; i++) if (ncl_iodmount[i] == nmp) { ncl_iodwant[i] = NFSIOD_AVAILABLE; ncl_iodmount[i] = NULL; } mtx_unlock(&ncl_iod_mutex); + + /* + * We can now set mnt_data to NULL and wait for + * nfssvc(NFSSVC_FORCEDISM) to complete. + */ + mtx_lock(&mountlist_mtx); + mtx_lock(&nmp->nm_mtx); + mp->mnt_data = NULL; + mtx_unlock(&mountlist_mtx); + while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0) + msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0); + mtx_unlock(&nmp->nm_mtx); + newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); FREE(nmp->nm_nam, M_SONAME); if (nmp->nm_sockreq.nr_auth != NULL) AUTH_DESTROY(nmp->nm_sockreq.nr_auth); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) { if (dsp != TAILQ_FIRST(&nmp->nm_sess) && dsp->nfsclds_sockp != NULL) newnfs_disconnect(dsp->nfsclds_sockp); nfscl_freenfsclds(dsp); } FREE(nmp, M_NEWNFSMNT); out: return (error); } /* * Return root of a filesystem */ static int nfs_root(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *vp; struct nfsmount *nmp; struct nfsnode *np; int error; nmp = VFSTONFS(mp); error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags); if (error) return error; vp = NFSTOV(np); /* * Get transfer parameters and attributes for root vnode once. */ mtx_lock(&nmp->nm_mtx); if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { mtx_unlock(&nmp->nm_mtx); ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread); } else mtx_unlock(&nmp->nm_mtx); if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } /* * Flush out the buffer cache */ /* ARGSUSED */ static int nfs_sync(struct mount *mp, int waitfor) { struct vnode *vp, *mvp; struct thread *td; int error, allerror = 0; td = curthread; MNT_ILOCK(mp); /* * If a forced dismount is in progress, return from here so that * the umount(2) syscall doesn't get stuck in VFS_SYNC() before * calling VFS_UNMOUNT(). */ if (NFSCL_FORCEDISM(mp)) { MNT_IUNLOCK(mp); return (EBADF); } MNT_IUNLOCK(mp); /* * Force stale buffer cache information to be flushed. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* XXX Racy bv_cnt check. */ if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY) { VI_UNLOCK(vp); continue; } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } error = VOP_FSYNC(vp, waitfor, td); if (error) allerror = error; NFSVOPUNLOCK(vp, 0); vrele(vp); } return (allerror); } static int nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req) { struct nfsmount *nmp = VFSTONFS(mp); struct vfsquery vq; int error; bzero(&vq, sizeof(vq)); switch (op) { #if 0 case VFS_CTL_NOLOCKS: val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0; if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &val, sizeof(val)); if (error) return (error); } if (req->newptr != NULL) { error = SYSCTL_IN(req, &val, sizeof(val)); if (error) return (error); if (val) nmp->nm_flag |= NFSMNT_NOLOCKS; else nmp->nm_flag &= ~NFSMNT_NOLOCKS; } break; #endif case VFS_CTL_QUERY: mtx_lock(&nmp->nm_mtx); if (nmp->nm_state & NFSSTA_TIMEO) vq.vq_flags |= VQ_NOTRESP; mtx_unlock(&nmp->nm_mtx); #if 0 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) vq.vq_flags |= VQ_NOTRESPLOCK; #endif error = SYSCTL_OUT(req, &vq, sizeof(vq)); break; case VFS_CTL_TIMEO: if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); } if (req->newptr != NULL) { error = vfs_suser(mp, req->td); if (error) return (error); error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; } break; default: return (ENOTSUP); } return (0); } /* * Purge any RPCs in progress, so that they will all return errors. * This allows dounmount() to continue as far as VFS_UNMOUNT() for a * forced dismount. */ static void nfs_purge(struct mount *mp) { struct nfsmount *nmp = VFSTONFS(mp); newnfs_nmcancelreqs(nmp); } /* * Extract the information needed by the nlm from the nfs vnode. */ static void nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp, struct sockaddr_storage *sp, int *is_v3p, off_t *sizep, struct timeval *timeop) { struct nfsmount *nmp; struct nfsnode *np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); if (fhlenp != NULL) *fhlenp = (size_t)np->n_fhp->nfh_len; if (fhp != NULL) bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len); if (sp != NULL) bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp))); if (is_v3p != NULL) *is_v3p = NFS_ISV3(vp); if (sizep != NULL) *sizep = np->n_size; if (timeop != NULL) { timeop->tv_sec = nmp->nm_timeo / NFS_HZ; timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ); } } /* * This function prints out an option name, based on the conditional * argument. */ static __inline void nfscl_printopt(struct nfsmount *nmp, int testval, char *opt, char **buf, size_t *blen) { int len; if (testval != 0 && *blen > strlen(opt)) { len = snprintf(*buf, *blen, "%s", opt); if (len != strlen(opt)) printf("EEK!!\n"); *buf += len; *blen -= len; } } /* * This function printf out an options integer value. */ static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval, char *opt, char **buf, size_t *blen) { int len; if (*blen > strlen(opt) + 1) { /* Could result in truncated output string. */ len = snprintf(*buf, *blen, "%s=%d", opt, optval); if (len < *blen) { *buf += len; *blen -= len; } } } /* * Load the option flags and values into the buffer. */ void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen) { char *buf; size_t blen; buf = buffer; blen = buflen; nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf, &blen); if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) { nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 && nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen); } nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0, "nfsv2", &buf, &blen); nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen); nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0, ",noncontigwr", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) == 0, ",lockd", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) == NFSMNT_NOLOCKD, ",nolockd", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen); } Index: head/sys/fs/nfsclient/nfsmount.h =================================================================== --- head/sys/fs/nfsclient/nfsmount.h (revision 321687) +++ head/sys/fs/nfsclient/nfsmount.h (revision 321688) @@ -1,141 +1,146 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFSCLIENT_NFSMOUNT_H_ #define _NFSCLIENT_NFSMOUNT_H_ #include /* * Mount structure. * One allocated on every NFS mount. * Holds NFS specific information for mount. */ struct nfsmount { struct nfsmount_common nm_com; /* Common fields for nlm */ + uint32_t nm_privflag; /* Private flags */ int nm_numgrps; /* Max. size of groupslist */ u_char nm_fh[NFSX_FHMAX]; /* File handle of root dir */ int nm_fhsize; /* Size of root file handle */ struct nfssockreq nm_sockreq; /* Socket Info */ int nm_timeouts; /* Request timeouts */ int nm_rsize; /* Max size of read rpc */ int nm_wsize; /* Max size of write rpc */ int nm_readdirsize; /* Size of a readdir rpc */ int nm_readahead; /* Num. of blocks to readahead */ int nm_wcommitsize; /* Max size of commit for write */ int nm_acdirmin; /* Directory attr cache min lifetime */ int nm_acdirmax; /* Directory attr cache max lifetime */ int nm_acregmin; /* Reg file attr cache min lifetime */ int nm_acregmax; /* Reg file attr cache max lifetime */ u_char nm_verf[NFSX_VERF]; /* write verifier */ TAILQ_HEAD(, buf) nm_bufq; /* async io buffer queue */ short nm_bufqlen; /* number of buffers in queue */ short nm_bufqwant; /* process wants to add to the queue */ int nm_bufqiods; /* number of iods processing queue */ u_int64_t nm_maxfilesize; /* maximum file size */ int nm_tprintf_initial_delay; /* initial delay */ int nm_tprintf_delay; /* interval for messages */ int nm_nametimeo; /* timeout for +ve entries (sec) */ int nm_negnametimeo; /* timeout for -ve entries (sec) */ /* Newnfs additions */ TAILQ_HEAD(, nfsclds) nm_sess; /* Session(s) for NFSv4.1. */ struct nfsclclient *nm_clp; uid_t nm_uid; /* Uid for SetClientID etc. */ u_int64_t nm_clval; /* identifies which clientid */ u_int64_t nm_fsid[2]; /* NFSv4 fsid */ int nm_minorvers; /* Minor version # for NFSv4 */ u_int16_t nm_krbnamelen; /* Krb5 host principal, if any */ u_int16_t nm_dirpathlen; /* and mount dirpath, for V4 */ u_int16_t nm_srvkrbnamelen; /* and the server's target name */ u_char nm_name[1]; /* malloc'd actual len of krbname + dirpath */ }; #define nm_nam nm_sockreq.nr_nam #define nm_sotype nm_sockreq.nr_sotype #define nm_so nm_sockreq.nr_so #define nm_soflags nm_sockreq.nr_soflags #define nm_soproto nm_sockreq.nr_soproto #define nm_client nm_sockreq.nr_client #define nm_krbname nm_name #define nm_mtx nm_com.nmcom_mtx #define nm_flag nm_com.nmcom_flag #define nm_state nm_com.nmcom_state #define nm_mountp nm_com.nmcom_mountp #define nm_timeo nm_com.nmcom_timeo #define nm_retry nm_com.nmcom_retry #define nm_hostname nm_com.nmcom_hostname #define nm_getinfo nm_com.nmcom_getinfo #define nm_vinvalbuf nm_com.nmcom_vinvalbuf + +/* Private flags. */ +#define NFSMNTP_FORCEDISM 0x00000001 +#define NFSMNTP_CANCELRPCS 0x00000002 #define NFSMNT_DIRPATH(m) (&((m)->nm_name[(m)->nm_krbnamelen + 1])) #define NFSMNT_SRVKRBNAME(m) \ (&((m)->nm_name[(m)->nm_krbnamelen + (m)->nm_dirpathlen + 2])) #if defined(_KERNEL) /* * Convert mount ptr to nfsmount ptr. */ #define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data)) /* * Get a pointer to the MDS session, which is always the first element * in the list. * This macro can only be safely used when the NFSLOCKMNT() lock is held. * The inline function can be used when the lock isn't held. */ #define NFSMNT_MDSSESSION(m) (&(TAILQ_FIRST(&((m)->nm_sess))->nfsclds_sess)) static __inline struct nfsclsession * nfsmnt_mdssession(struct nfsmount *nmp) { struct nfsclsession *tsep; mtx_lock(&nmp->nm_mtx); tsep = NFSMNT_MDSSESSION(nmp); mtx_unlock(&nmp->nm_mtx); return (tsep); } #ifndef NFS_DEFAULT_NAMETIMEO #define NFS_DEFAULT_NAMETIMEO 60 #endif #ifndef NFS_DEFAULT_NEGNAMETIMEO #define NFS_DEFAULT_NEGNAMETIMEO 60 #endif #endif /* _KERNEL */ #endif /* _NFSCLIENT_NFSMOUNT_H_ */ Index: head/sys/nfs/nfs_nfssvc.c =================================================================== --- head/sys/nfs/nfs_nfssvc.c (revision 321687) +++ head/sys/nfs/nfs_nfssvc.c (revision 321688) @@ -1,157 +1,157 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_nfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int nfssvc_offset = SYS_nfssvc; static struct sysent nfssvc_prev_sysent; MAKE_SYSENT(nfssvc); /* * This tiny module simply handles the nfssvc() system call. The other * nfs modules that use the system call register themselves by setting * the nfsd_call_xxx function pointers non-NULL. */ int (*nfsd_call_nfsserver)(struct thread *, struct nfssvc_args *) = NULL; int (*nfsd_call_nfscommon)(struct thread *, struct nfssvc_args *) = NULL; int (*nfsd_call_nfscl)(struct thread *, struct nfssvc_args *) = NULL; int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *) = NULL; /* * Nfs server pseudo system call for the nfsd's */ int sys_nfssvc(struct thread *td, struct nfssvc_args *uap) { int error; KASSERT(!mtx_owned(&Giant), ("nfssvc(): called with Giant")); AUDIT_ARG_CMD(uap->flag); /* Allow anyone to get the stats. */ if ((uap->flag & ~NFSSVC_GETSTATS) != 0) { error = priv_check(td, PRIV_NFS_DAEMON); if (error != 0) return (error); } error = EINVAL; if ((uap->flag & (NFSSVC_ADDSOCK | NFSSVC_OLDNFSD | NFSSVC_NFSD)) && nfsd_call_nfsserver != NULL) error = (*nfsd_call_nfsserver)(td, uap); else if ((uap->flag & (NFSSVC_CBADDSOCK | NFSSVC_NFSCBD | - NFSSVC_DUMPMNTOPTS)) && nfsd_call_nfscl != NULL) + NFSSVC_DUMPMNTOPTS | NFSSVC_FORCEDISM)) && nfsd_call_nfscl != NULL) error = (*nfsd_call_nfscl)(td, uap); else if ((uap->flag & (NFSSVC_IDNAME | NFSSVC_GETSTATS | NFSSVC_GSSDADDPORT | NFSSVC_GSSDADDFIRST | NFSSVC_GSSDDELETEALL | NFSSVC_NFSUSERDPORT | NFSSVC_NFSUSERDDELPORT)) && nfsd_call_nfscommon != NULL) error = (*nfsd_call_nfscommon)(td, uap); else if ((uap->flag & (NFSSVC_NFSDNFSD | NFSSVC_NFSDADDSOCK | NFSSVC_PUBLICFH | NFSSVC_V4ROOTEXPORT | NFSSVC_NOPUBLICFH | NFSSVC_STABLERESTART | NFSSVC_ADMINREVOKE | NFSSVC_DUMPCLIENTS | NFSSVC_DUMPLOCKS | NFSSVC_BACKUPSTABLE | NFSSVC_SUSPENDNFSD | NFSSVC_RESUMENFSD)) && nfsd_call_nfsd != NULL) error = (*nfsd_call_nfsd)(td, uap); if (error == EINTR || error == ERESTART) error = 0; return (error); } /* * Called once to initialize data structures... */ static int nfssvc_modevent(module_t mod, int type, void *data) { static int registered; int error = 0; switch (type) { case MOD_LOAD: error = syscall_register(&nfssvc_offset, &nfssvc_sysent, &nfssvc_prev_sysent, SY_THR_STATIC_KLD); if (error) break; registered = 1; break; case MOD_UNLOAD: if (nfsd_call_nfsserver != NULL || nfsd_call_nfscommon != NULL || nfsd_call_nfscl != NULL || nfsd_call_nfsd != NULL) { error = EBUSY; break; } if (registered) syscall_deregister(&nfssvc_offset, &nfssvc_prev_sysent); registered = 0; break; default: error = EOPNOTSUPP; break; } return error; } static moduledata_t nfssvc_mod = { "nfssvc", nfssvc_modevent, NULL, }; DECLARE_MODULE(nfssvc, nfssvc_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfssvc, 1); Index: head/sys/nfs/nfssvc.h =================================================================== --- head/sys/nfs/nfssvc.h (revision 321687) +++ head/sys/nfs/nfssvc.h (revision 321688) @@ -1,81 +1,82 @@ /*- * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSSVC_H_ #define _NFS_NFSSVC_H_ /* * Flags for nfssvc() system call. */ #define NFSSVC_OLDNFSD 0x004 #define NFSSVC_ADDSOCK 0x008 #define NFSSVC_NFSD 0x010 /* * and ones for nfsv4. */ #define NFSSVC_NOPUBLICFH 0x00000020 #define NFSSVC_STABLERESTART 0x00000040 #define NFSSVC_NFSDNFSD 0x00000080 #define NFSSVC_NFSDADDSOCK 0x00000100 #define NFSSVC_IDNAME 0x00000200 #define NFSSVC_GSSDDELETEALL 0x00000400 #define NFSSVC_GSSDADDPORT 0x00000800 #define NFSSVC_NFSUSERDPORT 0x00001000 #define NFSSVC_NFSUSERDDELPORT 0x00002000 #define NFSSVC_V4ROOTEXPORT 0x00004000 #define NFSSVC_ADMINREVOKE 0x00008000 #define NFSSVC_DUMPCLIENTS 0x00010000 #define NFSSVC_DUMPLOCKS 0x00020000 #define NFSSVC_GSSDADDFIRST 0x00040000 #define NFSSVC_PUBLICFH 0x00080000 #define NFSSVC_NFSCBD 0x00100000 #define NFSSVC_CBADDSOCK 0x00200000 #define NFSSVC_GETSTATS 0x00400000 #define NFSSVC_BACKUPSTABLE 0x00800000 #define NFSSVC_ZEROCLTSTATS 0x01000000 /* modifier for GETSTATS */ #define NFSSVC_ZEROSRVSTATS 0x02000000 /* modifier for GETSTATS */ #define NFSSVC_SUSPENDNFSD 0x04000000 #define NFSSVC_RESUMENFSD 0x08000000 #define NFSSVC_DUMPMNTOPTS 0x10000000 #define NFSSVC_NEWSTRUCT 0x20000000 +#define NFSSVC_FORCEDISM 0x40000000 /* Argument structure for NFSSVC_DUMPMNTOPTS. */ struct nfscl_dumpmntopts { char *ndmnt_fname; /* File Name */ size_t ndmnt_blen; /* Size of buffer */ void *ndmnt_buf; /* and the buffer */ }; #endif /* _NFS_NFSSVC_H */ Index: head/sys/sys/param.h =================================================================== --- head/sys/sys/param.h (revision 321687) +++ head/sys/sys/param.h (revision 321688) @@ -1,362 +1,362 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)param.h 8.3 (Berkeley) 4/4/95 * $FreeBSD$ */ #ifndef _SYS_PARAM_H_ #define _SYS_PARAM_H_ #include #define BSD 199506 /* System version (year & month). */ #define BSD4_3 1 #define BSD4_4 1 /* * __FreeBSD_version numbers are documented in the Porter's Handbook. * If you bump the version for any reason, you should update the documentation * there. * Currently this lives here in the doc/ repository: * * head/en_US.ISO8859-1/books/porters-handbook/versions/chapter.xml * * scheme is: Rxx * 'R' is in the range 0 to 4 if this is a release branch or * X.0-CURRENT before releng/X.0 is created, otherwise 'R' is * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1200039 /* Master, propagated to newvers */ +#define __FreeBSD_version 1200040 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, * which by definition is always true on FreeBSD. This macro is also defined * on other systems that use the kernel of FreeBSD, such as GNU/kFreeBSD. * * It is tempting to use this macro in userland code when we want to enable * kernel-specific routines, and in fact it's fine to do this in code that * is part of FreeBSD itself. However, be aware that as presence of this * macro is still not widespread (e.g. older FreeBSD versions, 3rd party * compilers, etc), it is STRONGLY DISCOURAGED to check for this macro in * external applications without also checking for __FreeBSD__ as an * alternative. */ #undef __FreeBSD_kernel__ #define __FreeBSD_kernel__ #if defined(_KERNEL) || defined(IN_RTLD) #define P_OSREL_SIGWAIT 700000 #define P_OSREL_SIGSEGV 700004 #define P_OSREL_MAP_ANON 800104 #define P_OSREL_MAP_FSTRICT 1100036 #define P_OSREL_SHUTDOWN_ENOTCONN 1100077 #define P_OSREL_MAP_GUARD 1200035 #define P_OSREL_MAJOR(x) ((x) / 100000) #endif #ifndef LOCORE #include #endif /* * Machine-independent constants (some used in following include files). * Redefined constants are from POSIX 1003.1 limits file. * * MAXCOMLEN should be >= sizeof(ac_comm) (see ) */ #include #define MAXCOMLEN 19 /* max command name remembered */ #define MAXINTERP PATH_MAX /* max interpreter file name length */ #define MAXLOGNAME 33 /* max login name length (incl. NUL) */ #define MAXUPRC CHILD_MAX /* max simultaneous processes */ #define NCARGS ARG_MAX /* max bytes for an exec function */ #define NGROUPS (NGROUPS_MAX+1) /* max number groups */ #define NOFILE OPEN_MAX /* max open files per process */ #define NOGROUP 65535 /* marker for empty group set member */ #define MAXHOSTNAMELEN 256 /* max hostname size */ #define SPECNAMELEN 63 /* max length of devicename */ /* More types and definitions used throughout the kernel. */ #ifdef _KERNEL #include #include #ifndef LOCORE #include #include #endif #ifndef FALSE #define FALSE 0 #endif #ifndef TRUE #define TRUE 1 #endif #endif #ifndef _KERNEL /* Signals. */ #include #endif /* Machine type dependent parameters. */ #include #ifndef _KERNEL #include #endif #ifndef DEV_BSHIFT #define DEV_BSHIFT 9 /* log2(DEV_BSIZE) */ #endif #define DEV_BSIZE (1<>PAGE_SHIFT) #endif /* * btodb() is messy and perhaps slow because `bytes' may be an off_t. We * want to shift an unsigned type to avoid sign extension and we don't * want to widen `bytes' unnecessarily. Assume that the result fits in * a daddr_t. */ #ifndef btodb #define btodb(bytes) /* calculates (bytes / DEV_BSIZE) */ \ (sizeof (bytes) > sizeof(long) \ ? (daddr_t)((unsigned long long)(bytes) >> DEV_BSHIFT) \ : (daddr_t)((unsigned long)(bytes) >> DEV_BSHIFT)) #endif #ifndef dbtob #define dbtob(db) /* calculates (db * DEV_BSIZE) */ \ ((off_t)(db) << DEV_BSHIFT) #endif #define PRIMASK 0x0ff #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ #define PDROP 0x200 /* OR'd with pri to stop re-entry of interlock mutex */ #define NZERO 0 /* default "nice" */ #define NBBY 8 /* number of bits in a byte */ #define NBPW sizeof(int) /* number of bytes per word (integer) */ #define CMASK 022 /* default file mask: S_IWGRP|S_IWOTH */ #define NODEV (dev_t)(-1) /* non-existent device */ /* * File system parameters and macros. * * MAXBSIZE - Filesystems are made out of blocks of at most MAXBSIZE bytes * per block. MAXBSIZE may be made larger without effecting * any existing filesystems as long as it does not exceed MAXPHYS, * and may be made smaller at the risk of not being able to use * filesystems which require a block size exceeding MAXBSIZE. * * MAXBCACHEBUF - Maximum size of a buffer in the buffer cache. This must * be >= MAXBSIZE and can be set differently for different * architectures by defining it in . * Making this larger allows NFS to do larger reads/writes. * * BKVASIZE - Nominal buffer space per buffer, in bytes. BKVASIZE is the * minimum KVM memory reservation the kernel is willing to make. * Filesystems can of course request smaller chunks. Actual * backing memory uses a chunk size of a page (PAGE_SIZE). * The default value here can be overridden on a per-architecture * basis by defining it in . * * If you make BKVASIZE too small you risk seriously fragmenting * the buffer KVM map which may slow things down a bit. If you * make it too big the kernel will not be able to optimally use * the KVM memory reserved for the buffer cache and will wind * up with too-few buffers. * * The default is 16384, roughly 2x the block size used by a * normal UFS filesystem. */ #define MAXBSIZE 65536 /* must be power of 2 */ #ifndef MAXBCACHEBUF #define MAXBCACHEBUF MAXBSIZE /* must be a power of 2 >= MAXBSIZE */ #endif #ifndef BKVASIZE #define BKVASIZE 16384 /* must be power of 2 */ #endif #define BKVAMASK (BKVASIZE-1) /* * MAXPATHLEN defines the longest permissible path length after expanding * symbolic links. It is used to allocate a temporary buffer from the buffer * pool in which to do the name expansion, hence should be a power of two, * and must be less than or equal to MAXBSIZE. MAXSYMLINKS defines the * maximum number of symbolic links that may be expanded in a path name. * It should be set high enough to allow all legitimate uses, but halt * infinite loops reasonably quickly. */ #define MAXPATHLEN PATH_MAX #define MAXSYMLINKS 32 /* Bit map related macros. */ #define setbit(a,i) (((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY)) #define clrbit(a,i) (((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY))) #define isset(a,i) \ (((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) #define isclr(a,i) \ ((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0) /* Macros for counting and rounding. */ #ifndef howmany #define howmany(x, y) (((x)+((y)-1))/(y)) #endif #define nitems(x) (sizeof((x)) / sizeof((x)[0])) #define rounddown(x, y) (((x)/(y))*(y)) #define rounddown2(x, y) ((x)&(~((y)-1))) /* if y is power of two */ #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) /* to any y */ #define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ #define powerof2(x) ((((x)-1)&(x))==0) /* Macros for min/max. */ #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) #ifdef _KERNEL /* * Basic byte order function prototypes for non-inline functions. */ #ifndef LOCORE #ifndef _BYTEORDER_PROTOTYPED #define _BYTEORDER_PROTOTYPED __BEGIN_DECLS __uint32_t htonl(__uint32_t); __uint16_t htons(__uint16_t); __uint32_t ntohl(__uint32_t); __uint16_t ntohs(__uint16_t); __END_DECLS #endif #endif #ifndef lint #ifndef _BYTEORDER_FUNC_DEFINED #define _BYTEORDER_FUNC_DEFINED #define htonl(x) __htonl(x) #define htons(x) __htons(x) #define ntohl(x) __ntohl(x) #define ntohs(x) __ntohs(x) #endif /* !_BYTEORDER_FUNC_DEFINED */ #endif /* lint */ #endif /* _KERNEL */ /* * Scale factor for scaled integers used to count %cpu time and load avgs. * * The number of CPU `tick's that map to a unique `%age' can be expressed * by the formula (1 / (2 ^ (FSHIFT - 11))). The maximum load average that * can be calculated (assuming 32 bits) can be closely approximated using * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15). * * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age', * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024. */ #define FSHIFT 11 /* bits to right of fixed binary point */ #define FSCALE (1<> (PAGE_SHIFT - DEV_BSHIFT)) #define ctodb(db) /* calculates pages to devblks */ \ ((db) << (PAGE_SHIFT - DEV_BSHIFT)) /* * Old spelling of __containerof(). */ #define member2struct(s, m, x) \ ((struct s *)(void *)((char *)(x) - offsetof(struct s, m))) /* * Access a variable length array that has been declared as a fixed * length array. */ #define __PAST_END(array, offset) (((__typeof__(*(array)) *)(array))[offset]) #endif /* _SYS_PARAM_H_ */