Index: head/sys/fs/nfsclient/nfs_clport.c =================================================================== --- head/sys/fs/nfsclient/nfs_clport.c (revision 299412) +++ head/sys/fs/nfsclient/nfs_clport.c (revision 299413) @@ -1,1432 +1,1435 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include /* * generally, I don't like #includes inside .h files, but it seems to * be the easiest way to handle the port. */ #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS dtrace_nfsclient_attrcache_flush_probe_func_t dtrace_nfscl_attrcache_flush_done_probe; uint32_t nfscl_attrcache_flush_done_id; dtrace_nfsclient_attrcache_get_hit_probe_func_t dtrace_nfscl_attrcache_get_hit_probe; uint32_t nfscl_attrcache_get_hit_id; dtrace_nfsclient_attrcache_get_miss_probe_func_t dtrace_nfscl_attrcache_get_miss_probe; uint32_t nfscl_attrcache_get_miss_id; dtrace_nfsclient_attrcache_load_probe_func_t dtrace_nfscl_attrcache_load_done_probe; uint32_t nfscl_attrcache_load_done_id; #endif /* !KDTRACE_HOOKS */ extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern struct vop_vector newnfs_vnodeops; extern struct vop_vector newnfs_fifoops; extern uma_zone_t newnfsnode_zone; extern struct buf_ops buf_ops_newnfs; extern int ncl_pbuf_freecnt; extern short nfsv4_cbport; extern int nfscl_enablecallb; extern int nfs_numnfscbd; extern int nfscl_inited; struct mtx nfs_clstate_mutex; struct mtx ncl_iod_mutex; NFSDLOCKMUTEX; extern void (*ncl_call_invalcaches)(struct vnode *); SYSCTL_DECL(_vfs_nfs); static int ncl_fileid_maxwarnings = 10; SYSCTL_INT(_vfs_nfs, OID_AUTO, fileid_maxwarnings, CTLFLAG_RWTUN, &ncl_fileid_maxwarnings, 0, "Limit fileid corruption warnings; 0 is off; -1 is unlimited"); static volatile int ncl_fileid_nwarnings; static void nfscl_warn_fileid(struct nfsmount *, struct nfsvattr *, struct nfsvattr *); /* * Comparison function for vfs_hash functions. */ int newnfs_vncmpf(struct vnode *vp, void *arg) { struct nfsfh *nfhp = (struct nfsfh *)arg; struct nfsnode *np = VTONFS(vp); if (np->n_fhp->nfh_len != nfhp->nfh_len || NFSBCMP(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len)) return (1); return (0); } /* * Look up a vnode/nfsnode by file handle. * Callers must check for mount points!! * In all cases, a pointer to a * nfsnode structure is returned. * This variant takes a "struct nfsfh *" as second argument and uses * that structure up, either by hanging off the nfsnode or FREEing it. */ int nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp, struct componentname *cnp, struct thread *td, struct nfsnode **npp, void *stuff, int lkflags) { struct nfsnode *np, *dnp; struct vnode *vp, *nvp; struct nfsv4node *newd, *oldd; int error; u_int hash; struct nfsmount *nmp; nmp = VFSTONFS(mntp); dnp = VTONFS(dvp); *npp = NULL; hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, FNV1_32_INIT); error = vfs_hash_get(mntp, hash, lkflags, td, &nvp, newnfs_vncmpf, nfhp); if (error == 0 && nvp != NULL) { /* * I believe there is a slight chance that vgonel() could * get called on this vnode between when NFSVOPLOCK() drops * the VI_LOCK() and vget() acquires it again, so that it * hasn't yet had v_usecount incremented. If this were to * happen, the VI_DOOMED flag would be set, so check for * that here. Since we now have the v_usecount incremented, * we should be ok until we vrele() it, if the VI_DOOMED * flag isn't set now. */ VI_LOCK(nvp); if ((nvp->v_iflag & VI_DOOMED)) { VI_UNLOCK(nvp); vrele(nvp); error = ENOENT; } else { VI_UNLOCK(nvp); } } if (error) { FREE((caddr_t)nfhp, M_NFSFH); return (error); } if (nvp != NULL) { np = VTONFS(nvp); /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ oldd = newd = NULL; if ((nmp->nm_flag & NFSMNT_NFSV4) && np->n_v4 != NULL && nvp->v_type == VREG && (np->n_v4->n4_namelen != cnp->cn_namelen || NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { MALLOC(newd, struct nfsv4node *, sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + + cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); NFSLOCKNODE(np); if (newd != NULL && np->n_v4 != NULL && nvp->v_type == VREG && (np->n_v4->n4_namelen != cnp->cn_namelen || NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { oldd = np->n_v4; np->n_v4 = newd; newd = NULL; np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = cnp->cn_namelen; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen); } NFSUNLOCKNODE(np); } if (newd != NULL) FREE((caddr_t)newd, M_NFSV4NODE); if (oldd != NULL) FREE((caddr_t)oldd, M_NFSV4NODE); *npp = np; FREE((caddr_t)nfhp, M_NFSFH); return (0); } np = uma_zalloc(newnfsnode_zone, M_WAITOK | M_ZERO); error = getnewvnode(nfs_vnode_tag, mntp, &newnfs_vnodeops, &nvp); if (error) { uma_zfree(newnfsnode_zone, np); FREE((caddr_t)nfhp, M_NFSFH); return (error); } vp = nvp; KASSERT(vp->v_bufobj.bo_bsize != 0, ("nfscl_nget: bo_bsize == 0")); vp->v_bufobj.bo_ops = &buf_ops_newnfs; vp->v_data = np; np->n_vnode = vp; /* * Initialize the mutex even if the vnode is going to be a loser. * This simplifies the logic in reclaim, which can then unconditionally * destroy the mutex (in the case of the loser, or if hash_insert * happened to return an error no special casing is needed). */ mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK); /* * Are we getting the root? If so, make sure the vnode flags * are correct */ if ((nfhp->nfh_len == nmp->nm_fhsize) && !bcmp(nfhp->nfh_fh, nmp->nm_fh, nfhp->nfh_len)) { if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; } np->n_fhp = nfhp; /* * For NFSv4, we have to attach the directory file handle and * file name, so that Open Ops can be done later. */ if (nmp->nm_flag & NFSMNT_NFSV4) { MALLOC(np->n_v4, struct nfsv4node *, sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = cnp->cn_namelen; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen); } else { np->n_v4 = NULL; } /* * NFS supports recursive and shared locking. */ lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_NOWITNESS, NULL); VN_LOCK_AREC(vp); VN_LOCK_ASHARE(vp); error = insmntque(vp, mntp); if (error != 0) { *npp = NULL; mtx_destroy(&np->n_mtx); FREE((caddr_t)nfhp, M_NFSFH); if (np->n_v4 != NULL) FREE((caddr_t)np->n_v4, M_NFSV4NODE); uma_zfree(newnfsnode_zone, np); return (error); } error = vfs_hash_insert(vp, hash, lkflags, td, &nvp, newnfs_vncmpf, nfhp); if (error) return (error); if (nvp != NULL) { *npp = VTONFS(nvp); /* vfs_hash_insert() vput()'s the losing vnode */ return (0); } *npp = np; return (0); } /* - * Anothe variant of nfs_nget(). This one is only used by reopen. It + * Another variant of nfs_nget(). This one is only used by reopen. It * takes almost the same args as nfs_nget(), but only succeeds if an entry * exists in the cache. (Since files should already be "open" with a * vnode ref cnt on the node when reopen calls this, it should always * succeed.) * Also, don't get a vnode lock, since it may already be locked by some * other process that is handling it. This is ok, since all other threads * on the client are blocked by the nfsc_lock being exclusively held by the * caller of this function. */ int nfscl_ngetreopen(struct mount *mntp, u_int8_t *fhp, int fhsize, struct thread *td, struct nfsnode **npp) { struct vnode *nvp; u_int hash; struct nfsfh *nfhp; int error; *npp = NULL; /* For forced dismounts, just return error. */ if ((mntp->mnt_kern_flag & MNTK_UNMOUNTF)) return (EINTR); MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + fhsize, M_NFSFH, M_WAITOK); bcopy(fhp, &nfhp->nfh_fh[0], fhsize); nfhp->nfh_len = fhsize; hash = fnv_32_buf(fhp, fhsize, FNV1_32_INIT); /* * First, try to get the vnode locked, but don't block for the lock. */ error = vfs_hash_get(mntp, hash, (LK_EXCLUSIVE | LK_NOWAIT), td, &nvp, newnfs_vncmpf, nfhp); if (error == 0 && nvp != NULL) { NFSVOPUNLOCK(nvp, 0); } else if (error == EBUSY) { /* - * The LK_EXCLOTHER lock type tells nfs_lock1() to not try - * and lock the vnode, but just get a v_usecount on it. - * LK_NOWAIT is set so that when vget() returns ENOENT, - * vfs_hash_get() fails instead of looping. - * If this succeeds, it is safe so long as a vflush() with + * It is safe so long as a vflush() with * FORCECLOSE has not been done. Since the Renew thread is * stopped and the MNTK_UNMOUNTF flag is set before doing * a vflush() with FORCECLOSE, we should be ok here. */ if ((mntp->mnt_kern_flag & MNTK_UNMOUNTF)) error = EINTR; - else - error = vfs_hash_get(mntp, hash, - (LK_EXCLOTHER | LK_NOWAIT), td, &nvp, - newnfs_vncmpf, nfhp); + else { + vfs_hash_ref(mntp, hash, td, &nvp, newnfs_vncmpf, nfhp); + if (nvp == NULL) { + error = ENOENT; + } else if ((nvp->v_iflag & VI_DOOMED) != 0) { + error = ENOENT; + vrele(nvp); + } else { + error = 0; + } + } } FREE(nfhp, M_NFSFH); if (error) return (error); if (nvp != NULL) { *npp = VTONFS(nvp); return (0); } return (EINVAL); } static void nfscl_warn_fileid(struct nfsmount *nmp, struct nfsvattr *oldnap, struct nfsvattr *newnap) { int off; if (ncl_fileid_maxwarnings >= 0 && ncl_fileid_nwarnings >= ncl_fileid_maxwarnings) return; off = 0; if (ncl_fileid_maxwarnings >= 0) { if (++ncl_fileid_nwarnings >= ncl_fileid_maxwarnings) off = 1; } printf("newnfs: server '%s' error: fileid changed. " "fsid %jx:%jx: expected fileid %#jx, got %#jx. " "(BROKEN NFS SERVER OR MIDDLEWARE)\n", nmp->nm_com.nmcom_hostname, (uintmax_t)nmp->nm_fsid[0], (uintmax_t)nmp->nm_fsid[1], (uintmax_t)oldnap->na_fileid, (uintmax_t)newnap->na_fileid); if (off) printf("newnfs: Logged %d times about fileid corruption; " "going quiet to avoid spamming logs excessively. (Limit " "is: %d).\n", ncl_fileid_nwarnings, ncl_fileid_maxwarnings); } /* * Load the attribute cache (that lives in the nfsnode entry) with * the attributes of the second argument and * Iff vaper not NULL * copy the attributes to *vaper * Similar to nfs_loadattrcache(), except the attributes are passed in * instead of being parsed out of the mbuf list. */ int nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, void *stuff, int writeattr, int dontshrink) { struct vnode *vp = *vpp; struct vattr *vap, *nvap = &nap->na_vattr, *vaper = nvaper; struct nfsnode *np; struct nfsmount *nmp; struct timespec mtime_save; u_quad_t nsize; int setnsize, error, force_fid_err; error = 0; setnsize = 0; nsize = 0; /* * If v_type == VNON it is a new node, so fill in the v_type, * n_mtime fields. Check to see if it represents a special * device, and if so, check for a possible alias. Once the * correct vnode has been obtained, fill in the rest of the * information. */ np = VTONFS(vp); NFSLOCKNODE(np); if (vp->v_type != nvap->va_type) { vp->v_type = nvap->va_type; if (vp->v_type == VFIFO) vp->v_op = &newnfs_fifoops; np->n_mtime = nvap->va_mtime; } nmp = VFSTONFS(vp->v_mount); vap = &np->n_vattr.na_vattr; mtime_save = vap->va_mtime; if (writeattr) { np->n_vattr.na_filerev = nap->na_filerev; np->n_vattr.na_size = nap->na_size; np->n_vattr.na_mtime = nap->na_mtime; np->n_vattr.na_ctime = nap->na_ctime; np->n_vattr.na_fsid = nap->na_fsid; np->n_vattr.na_mode = nap->na_mode; } else { force_fid_err = 0; KFAIL_POINT_ERROR(DEBUG_FP, nfscl_force_fileid_warning, force_fid_err); /* * BROKEN NFS SERVER OR MIDDLEWARE * * Certain NFS servers (certain old proprietary filers ca. * 2006) or broken middleboxes (e.g. WAN accelerator products) * will respond to GETATTR requests with results for a * different fileid. * * The WAN accelerator we've observed not only serves stale * cache results for a given file, it also occasionally serves * results for wholly different files. This causes surprising * problems; for example the cached size attribute of a file * may truncate down and then back up, resulting in zero * regions in file contents read by applications. We observed * this reliably with Clang and .c files during parallel build. * A pcap revealed packet fragmentation and GETATTR RPC * responses with wholly wrong fileids. */ if ((np->n_vattr.na_fileid != 0 && np->n_vattr.na_fileid != nap->na_fileid) || force_fid_err) { nfscl_warn_fileid(nmp, &np->n_vattr, nap); error = EIDRM; goto out; } NFSBCOPY((caddr_t)nap, (caddr_t)&np->n_vattr, sizeof (struct nfsvattr)); } /* * For NFSv4, if the node's fsid is not equal to the mount point's * fsid, return the low order 32bits of the node's fsid. This * allows getcwd(3) to work. There is a chance that the fsid might * be the same as a local fs, but since this is in an NFS mount * point, I don't think that will cause any problems? */ if (NFSHASNFSV4(nmp) && NFSHASHASSETFSID(nmp) && (nmp->nm_fsid[0] != np->n_vattr.na_filesid[0] || nmp->nm_fsid[1] != np->n_vattr.na_filesid[1])) { /* * va_fsid needs to be set to some value derived from * np->n_vattr.na_filesid that is not equal * vp->v_mount->mnt_stat.f_fsid[0], so that it changes * from the value used for the top level server volume * in the mounted subtree. */ if (vp->v_mount->mnt_stat.f_fsid.val[0] != (uint32_t)np->n_vattr.na_filesid[0]) vap->va_fsid = (uint32_t)np->n_vattr.na_filesid[0]; else vap->va_fsid = (uint32_t)hash32_buf( np->n_vattr.na_filesid, 2 * sizeof(uint64_t), 0); } else vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; np->n_attrstamp = time_second; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (dontshrink && vap->va_size < np->n_size) { /* * We've been told not to shrink the file; * zero np->n_attrstamp to indicate that * the attributes are stale. */ vap->va_size = np->n_size; np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); vnode_pager_setsize(vp, np->n_size); } else if (np->n_flag & NMODIFIED) { /* * We've modified the file: Use the larger * of our size, and the server's size. */ if (vap->va_size < np->n_size) { vap->va_size = np->n_size; } else { np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; } vnode_pager_setsize(vp, np->n_size); } else if (vap->va_size < np->n_size) { /* * When shrinking the size, the call to * vnode_pager_setsize() cannot be done * with the mutex held, so delay it until * after the mtx_unlock call. */ nsize = np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; setnsize = 1; } else { np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; vnode_pager_setsize(vp, np->n_size); } } else { np->n_size = vap->va_size; } } /* * The following checks are added to prevent a race between (say) * a READDIR+ and a WRITE. * READDIR+, WRITE requests sent out. * READDIR+ resp, WRITE resp received on client. * However, the WRITE resp was handled before the READDIR+ resp * causing the post op attrs from the write to be loaded first * and the attrs from the READDIR+ to be loaded later. If this * happens, we have stale attrs loaded into the attrcache. * We detect this by for the mtime moving back. We invalidate the * attrcache when this happens. */ if (timespeccmp(&mtime_save, &vap->va_mtime, >)) { /* Size changed or mtime went backwards */ np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } if (vaper != NULL) { NFSBCOPY((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } } out: #ifdef KDTRACE_HOOKS if (np->n_attrstamp != 0) KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, error); #endif NFSUNLOCKNODE(np); if (setnsize) vnode_pager_setsize(vp, nsize); return (error); } /* * Fill in the client id name. For these bytes: * 1 - they must be unique * 2 - they should be persistent across client reboots * 1 is more critical than 2 * Use the mount point's unique id plus either the uuid or, if that * isn't set, random junk. */ void nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen) { int uuidlen; /* * First, put in the 64bit mount point identifier. */ if (idlen >= sizeof (u_int64_t)) { NFSBCOPY((caddr_t)&clval, cp, sizeof (u_int64_t)); cp += sizeof (u_int64_t); idlen -= sizeof (u_int64_t); } /* * If uuid is non-zero length, use it. */ uuidlen = strlen(uuid); if (uuidlen > 0 && idlen >= uuidlen) { NFSBCOPY(uuid, cp, uuidlen); cp += uuidlen; idlen -= uuidlen; } /* * This only normally happens if the uuid isn't set. */ while (idlen > 0) { *cp++ = (u_int8_t)(arc4random() % 256); idlen--; } } /* * Fill in a lock owner name. For now, pid + the process's creation time. */ void nfscl_filllockowner(void *id, u_int8_t *cp, int flags) { union { u_int32_t lval; u_int8_t cval[4]; } tl; struct proc *p; if (id == NULL) { printf("NULL id\n"); bzero(cp, NFSV4CL_LOCKNAMELEN); return; } if ((flags & F_POSIX) != 0) { p = (struct proc *)id; tl.lval = p->p_pid; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp++ = tl.cval[3]; tl.lval = p->p_stats->p_start.tv_sec; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp++ = tl.cval[3]; tl.lval = p->p_stats->p_start.tv_usec; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp = tl.cval[3]; } else if ((flags & F_FLOCK) != 0) { bcopy(&id, cp, sizeof(id)); bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id)); } else { printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n"); bzero(cp, NFSV4CL_LOCKNAMELEN); } } /* * Find the parent process for the thread passed in as an argument. * If none exists, return NULL, otherwise return a thread for the parent. * (Can be any of the threads, since it is only used for td->td_proc.) */ NFSPROC_T * nfscl_getparent(struct thread *td) { struct proc *p; struct thread *ptd; if (td == NULL) return (NULL); p = td->td_proc; if (p->p_pid == 0) return (NULL); p = p->p_pptr; if (p == NULL) return (NULL); ptd = TAILQ_FIRST(&p->p_threads); return (ptd); } /* * Start up the renew kernel thread. */ static void start_nfscl(void *arg) { struct nfsclclient *clp; struct thread *td; clp = (struct nfsclclient *)arg; td = TAILQ_FIRST(&clp->nfsc_renewthread->p_threads); nfscl_renewthread(clp, td); kproc_exit(0); } void nfscl_start_renewthread(struct nfsclclient *clp) { kproc_create(start_nfscl, (void *)clp, &clp->nfsc_renewthread, 0, 0, "nfscl"); } /* * Handle wcc_data. * For NFSv4, it assumes that nfsv4_wccattr() was used to set up the getattr * as the first Op after PutFH. * (For NFSv4, the postop attributes are after the Op, so they can't be * parsed here. A separate call to nfscl_postop_attr() is required.) */ int nfscl_wcc_data(struct nfsrv_descript *nd, struct vnode *vp, struct nfsvattr *nap, int *flagp, int *wccflagp, void *stuff) { u_int32_t *tl; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; int error = 0; if (wccflagp != NULL) *wccflagp = 0; if (nd->nd_flag & ND_NFSV3) { *flagp = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); if (wccflagp != NULL) { mtx_lock(&np->n_mtx); *wccflagp = (np->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) && np->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); mtx_unlock(&np->n_mtx); } } error = nfscl_postop_attr(nd, nap, flagp, stuff); } else if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); if (error) return (error); /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*++tl) nd->nd_flag |= ND_NOMOREDATA; if (wccflagp != NULL && nfsva.na_vattr.va_mtime.tv_sec != 0) { mtx_lock(&np->n_mtx); *wccflagp = (np->n_mtime.tv_sec == nfsva.na_vattr.va_mtime.tv_sec && np->n_mtime.tv_nsec == nfsva.na_vattr.va_mtime.tv_sec); mtx_unlock(&np->n_mtx); } } nfsmout: return (error); } /* * Get postop attributes. */ int nfscl_postop_attr(struct nfsrv_descript *nd, struct nfsvattr *nap, int *retp, void *stuff) { u_int32_t *tl; int error = 0; *retp = 0; if (nd->nd_flag & ND_NOMOREDATA) return (error); if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); *retp = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV4) { /* * For NFSv4, the postop attr are at the end, so no point * in looking if nd_repstat != 0. */ if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) /* should never happen since nd_repstat != 0 */ nd->nd_flag |= ND_NOMOREDATA; else *retp = 1; } } else if (!nd->nd_repstat) { /* For NFSv2, the attributes are here iff nd_repstat == 0 */ *retp = 1; } if (*retp) { error = nfsm_loadattr(nd, nap); if (error) *retp = 0; } nfsmout: return (error); } /* * Fill in the setable attributes. The full argument indicates whether * to fill in them all or just mode and time. */ void nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap, struct vnode *vp, int flags, u_int32_t rdev) { u_int32_t *tl; struct nfsv2_sattr *sp; nfsattrbit_t attrbits; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_BUILD(sp, struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (mode_t)VNOVAL) sp->sa_mode = newnfs_xdrneg1; else sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = newnfs_xdrneg1; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = newnfs_xdrneg1; else sp->sa_gid = txdr_unsigned(vap->va_gid); if (flags & NFSSATTR_SIZE0) sp->sa_size = 0; else if (flags & NFSSATTR_SIZENEG1) sp->sa_size = newnfs_xdrneg1; else if (flags & NFSSATTR_SIZERDEV) sp->sa_size = txdr_unsigned(rdev); else sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); break; case ND_NFSV3: if (vap->va_mode != (mode_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_mode); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_uid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_gid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(vap->va_size, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if (vap->va_atime.tv_sec != VNOVAL) { if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_atime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } if (vap->va_mtime.tv_sec != VNOVAL) { if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_mtime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } break; case ND_NFSV4: NFSZERO_ATTRBIT(&attrbits); if (vap->va_mode != (mode_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE); if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP); if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); if (vap->va_atime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); if (vap->va_mtime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET); (void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); break; } } /* * nfscl_request() - mostly a wrapper for newnfs_request(). */ int nfscl_request(struct nfsrv_descript *nd, struct vnode *vp, NFSPROC_T *p, struct ucred *cred, void *stuff) { int ret, vers; struct nfsmount *nmp; nmp = VFSTONFS(vp->v_mount); if (nd->nd_flag & ND_NFSV4) vers = NFS_VER4; else if (nd->nd_flag & ND_NFSV3) vers = NFS_VER3; else vers = NFS_VER2; ret = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, NULL); return (ret); } /* * fill in this bsden's variant of statfs using nfsstatfs. */ void nfscl_loadsbinfo(struct nfsmount *nmp, struct nfsstatfs *sfp, void *statfs) { struct statfs *sbp = (struct statfs *)statfs; if (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) { sbp->f_bsize = NFS_FABLKSIZE; sbp->f_blocks = sfp->sf_tbytes / NFS_FABLKSIZE; sbp->f_bfree = sfp->sf_fbytes / NFS_FABLKSIZE; /* * Although sf_abytes is uint64_t and f_bavail is int64_t, * the value after dividing by NFS_FABLKSIZE is small * enough that it will fit in 63bits, so it is ok to * assign it to f_bavail without fear that it will become * negative. */ sbp->f_bavail = sfp->sf_abytes / NFS_FABLKSIZE; sbp->f_files = sfp->sf_tfiles; /* Since f_ffree is int64_t, clip it to 63bits. */ if (sfp->sf_ffiles > INT64_MAX) sbp->f_ffree = INT64_MAX; else sbp->f_ffree = sfp->sf_ffiles; } else if ((nmp->nm_flag & NFSMNT_NFSV4) == 0) { /* * The type casts to (int32_t) ensure that this code is * compatible with the old NFS client, in that it will * propagate bit31 to the high order bits. This may or may * not be correct for NFSv2, but since it is a legacy * environment, I'd rather retain backwards compatibility. */ sbp->f_bsize = (int32_t)sfp->sf_bsize; sbp->f_blocks = (int32_t)sfp->sf_blocks; sbp->f_bfree = (int32_t)sfp->sf_bfree; sbp->f_bavail = (int32_t)sfp->sf_bavail; sbp->f_files = 0; sbp->f_ffree = 0; } } /* * Use the fsinfo stuff to update the mount point. */ void nfscl_loadfsinfo(struct nfsmount *nmp, struct nfsfsinfo *fsp) { if ((nmp->nm_wsize == 0 || fsp->fs_wtpref < nmp->nm_wsize) && fsp->fs_wtpref >= NFS_FABLKSIZE) nmp->nm_wsize = (fsp->fs_wtpref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); if (fsp->fs_wtmax < nmp->nm_wsize && fsp->fs_wtmax > 0) { nmp->nm_wsize = fsp->fs_wtmax & ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize == 0) nmp->nm_wsize = fsp->fs_wtmax; } if (nmp->nm_wsize < NFS_FABLKSIZE) nmp->nm_wsize = NFS_FABLKSIZE; if ((nmp->nm_rsize == 0 || fsp->fs_rtpref < nmp->nm_rsize) && fsp->fs_rtpref >= NFS_FABLKSIZE) nmp->nm_rsize = (fsp->fs_rtpref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); if (fsp->fs_rtmax < nmp->nm_rsize && fsp->fs_rtmax > 0) { nmp->nm_rsize = fsp->fs_rtmax & ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize == 0) nmp->nm_rsize = fsp->fs_rtmax; } if (nmp->nm_rsize < NFS_FABLKSIZE) nmp->nm_rsize = NFS_FABLKSIZE; if ((nmp->nm_readdirsize == 0 || fsp->fs_dtpref < nmp->nm_readdirsize) && fsp->fs_dtpref >= NFS_DIRBLKSIZ) nmp->nm_readdirsize = (fsp->fs_dtpref + NFS_DIRBLKSIZ - 1) & ~(NFS_DIRBLKSIZ - 1); if (fsp->fs_rtmax < nmp->nm_readdirsize && fsp->fs_rtmax > 0) { nmp->nm_readdirsize = fsp->fs_rtmax & ~(NFS_DIRBLKSIZ - 1); if (nmp->nm_readdirsize == 0) nmp->nm_readdirsize = fsp->fs_rtmax; } if (nmp->nm_readdirsize < NFS_DIRBLKSIZ) nmp->nm_readdirsize = NFS_DIRBLKSIZ; if (fsp->fs_maxfilesize > 0 && fsp->fs_maxfilesize < nmp->nm_maxfilesize) nmp->nm_maxfilesize = fsp->fs_maxfilesize; nmp->nm_mountp->mnt_stat.f_iosize = newnfs_iosize(nmp); nmp->nm_state |= NFSSTA_GOTFSINFO; } /* * Lookups source address which should be used to communicate with * @nmp and stores it inside @pdst. * * Returns 0 on success. */ u_int8_t * nfscl_getmyip(struct nfsmount *nmp, struct in6_addr *paddr, int *isinet6p) { #if defined(INET6) || defined(INET) int error, fibnum; fibnum = curthread->td_proc->p_fibnum; #endif #ifdef INET if (nmp->nm_nam->sa_family == AF_INET) { struct sockaddr_in *sin; struct nhop4_extended nh_ext; sin = (struct sockaddr_in *)nmp->nm_nam; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); error = fib4_lookup_nh_ext(fibnum, sin->sin_addr, 0, 0, &nh_ext); CURVNET_RESTORE(); if (error != 0) return (NULL); if ((ntohl(nh_ext.nh_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { /* Ignore loopback addresses */ return (NULL); } *isinet6p = 0; *((struct in_addr *)paddr) = nh_ext.nh_src; return (u_int8_t *)paddr; } #endif #ifdef INET6 if (nmp->nm_nam->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)nmp->nm_nam; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); error = in6_selectsrc_addr(fibnum, &sin6->sin6_addr, sin6->sin6_scope_id, NULL, paddr, NULL); CURVNET_RESTORE(); if (error != 0) return (NULL); if (IN6_IS_ADDR_LOOPBACK(paddr)) return (NULL); /* Scope is embedded in */ *isinet6p = 1; return (u_int8_t *)paddr; } #endif return (NULL); } /* * Copy NFS uid, gids from the cred structure. */ void newnfs_copyincred(struct ucred *cr, struct nfscred *nfscr) { int i; KASSERT(cr->cr_ngroups >= 0, ("newnfs_copyincred: negative cr_ngroups")); nfscr->nfsc_uid = cr->cr_uid; nfscr->nfsc_ngroups = MIN(cr->cr_ngroups, NFS_MAXGRPS + 1); for (i = 0; i < nfscr->nfsc_ngroups; i++) nfscr->nfsc_groups[i] = cr->cr_groups[i]; } /* * Do any client specific initialization. */ void nfscl_init(void) { static int inited = 0; if (inited) return; inited = 1; nfscl_inited = 1; ncl_pbuf_freecnt = nswbuf / 2 + 1; } /* * Check each of the attributes to be set, to ensure they aren't already * the correct value. Disable setting ones already correct. */ int nfscl_checksattr(struct vattr *vap, struct nfsvattr *nvap) { if (vap->va_mode != (mode_t)VNOVAL) { if (vap->va_mode == nvap->na_mode) vap->va_mode = (mode_t)VNOVAL; } if (vap->va_uid != (uid_t)VNOVAL) { if (vap->va_uid == nvap->na_uid) vap->va_uid = (uid_t)VNOVAL; } if (vap->va_gid != (gid_t)VNOVAL) { if (vap->va_gid == nvap->na_gid) vap->va_gid = (gid_t)VNOVAL; } if (vap->va_size != VNOVAL) { if (vap->va_size == nvap->na_size) vap->va_size = VNOVAL; } /* * We are normally called with only a partially initialized * VAP. Since the NFSv3 spec says that server may use the * file attributes to store the verifier, the spec requires * us to do a SETATTR RPC. FreeBSD servers store the verifier * in atime, but we can't really assume that all servers will * so we ensure that our SETATTR sets both atime and mtime. * Set the VA_UTIMES_NULL flag for this case, so that * the server's time will be used. This is needed to * work around a bug in some Solaris servers, where * setting the time TOCLIENT causes the Setattr RPC * to return NFS_OK, but not set va_mode. */ if (vap->va_mtime.tv_sec == VNOVAL) { vfs_timestamp(&vap->va_mtime); vap->va_vaflags |= VA_UTIMES_NULL; } if (vap->va_atime.tv_sec == VNOVAL) vap->va_atime = vap->va_mtime; return (1); } /* * Map nfsv4 errors to errno.h errors. * The uid and gid arguments are only used for NFSERR_BADOWNER and that * error should only be returned for the Open, Create and Setattr Ops. * As such, most calls can just pass in 0 for those arguments. */ APPLESTATIC int nfscl_maperr(struct thread *td, int error, uid_t uid, gid_t gid) { struct proc *p; if (error < 10000) return (error); if (td != NULL) p = td->td_proc; else p = NULL; switch (error) { case NFSERR_BADOWNER: tprintf(p, LOG_INFO, "No name and/or group mapping for uid,gid:(%d,%d)\n", uid, gid); return (EPERM); case NFSERR_BADNAME: case NFSERR_BADCHAR: printf("nfsv4 char/name not handled by server\n"); return (ENOENT); case NFSERR_STALECLIENTID: case NFSERR_STALESTATEID: case NFSERR_EXPIRED: case NFSERR_BADSTATEID: case NFSERR_BADSESSION: printf("nfsv4 recover err returned %d\n", error); return (EIO); case NFSERR_BADHANDLE: case NFSERR_SERVERFAULT: case NFSERR_BADTYPE: case NFSERR_FHEXPIRED: case NFSERR_RESOURCE: case NFSERR_MOVED: case NFSERR_NOFILEHANDLE: case NFSERR_MINORVERMISMATCH: case NFSERR_OLDSTATEID: case NFSERR_BADSEQID: case NFSERR_LEASEMOVED: case NFSERR_RECLAIMBAD: case NFSERR_BADXDR: case NFSERR_OPILLEGAL: printf("nfsv4 client/server protocol prob err=%d\n", error); return (EIO); default: tprintf(p, LOG_INFO, "nfsv4 err=%d\n", error); return (EIO); }; } /* * Check to see if the process for this owner exists. Return 1 if it doesn't * and 0 otherwise. */ int nfscl_procdoesntexist(u_int8_t *own) { union { u_int32_t lval; u_int8_t cval[4]; } tl; struct proc *p; pid_t pid; int ret = 0; tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own++; pid = tl.lval; p = pfind_locked(pid); if (p == NULL) return (1); if (p->p_stats == NULL) { PROC_UNLOCK(p); return (0); } tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own++; if (tl.lval != p->p_stats->p_start.tv_sec) { ret = 1; } else { tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own; if (tl.lval != p->p_stats->p_start.tv_usec) ret = 1; } PROC_UNLOCK(p); return (ret); } /* * - nfs pseudo system call for the client */ /* * MPSAFE */ static int nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap) { struct file *fp; struct nfscbd_args nfscbdarg; struct nfsd_nfscbd_args nfscbdarg2; struct nameidata nd; struct nfscl_dumpmntopts dumpmntopts; cap_rights_t rights; char *buf; int error; if (uap->flag & NFSSVC_CBADDSOCK) { error = copyin(uap->argp, (caddr_t)&nfscbdarg, sizeof(nfscbdarg)); if (error) return (error); /* * Since we don't know what rights might be required, * pretend that we need them all. It is better to be too * careful than too reckless. */ error = fget(td, nfscbdarg.sock, cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); if (error) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); return (EPERM); } error = nfscbd_addsock(fp); fdrop(fp, td); if (!error && nfscl_enablecallb == 0) { nfsv4_cbport = nfscbdarg.port; nfscl_enablecallb = 1; } } else if (uap->flag & NFSSVC_NFSCBD) { if (uap->argp == NULL) return (EINVAL); error = copyin(uap->argp, (caddr_t)&nfscbdarg2, sizeof(nfscbdarg2)); if (error) return (error); error = nfscbd_nfsd(td, &nfscbdarg2); } else if (uap->flag & NFSSVC_DUMPMNTOPTS) { error = copyin(uap->argp, &dumpmntopts, sizeof(dumpmntopts)); if (error == 0 && (dumpmntopts.ndmnt_blen < 256 || dumpmntopts.ndmnt_blen > 1024)) error = EINVAL; if (error == 0) error = nfsrv_lookupfilename(&nd, dumpmntopts.ndmnt_fname, td); if (error == 0 && strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); error = EINVAL; } if (error == 0) { buf = malloc(dumpmntopts.ndmnt_blen, M_TEMP, M_WAITOK); nfscl_retopts(VFSTONFS(nd.ni_vp->v_mount), buf, dumpmntopts.ndmnt_blen); vput(nd.ni_vp); error = copyout(buf, dumpmntopts.ndmnt_buf, dumpmntopts.ndmnt_blen); free(buf, M_TEMP); } } else { error = EINVAL; } return (error); } extern int (*nfsd_call_nfscl)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfscl_modevent(module_t mod, int type, void *data) { int error = 0; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) return (0); newnfs_portinit(); mtx_init(&nfs_clstate_mutex, "nfs_clstate_mutex", NULL, MTX_DEF); mtx_init(&ncl_iod_mutex, "ncl_iod_mutex", NULL, MTX_DEF); nfscl_init(); NFSD_LOCK(); nfsrvd_cbinit(0); NFSD_UNLOCK(); ncl_call_invalcaches = ncl_invalcaches; nfsd_call_nfscl = nfssvc_nfscl; loaded = 1; break; case MOD_UNLOAD: if (nfs_numnfscbd != 0) { error = EBUSY; break; } /* * XXX: Unloading of nfscl module is unsupported. */ #if 0 ncl_call_invalcaches = NULL; nfsd_call_nfscl = NULL; /* and get rid of the mutexes */ mtx_destroy(&nfs_clstate_mutex); mtx_destroy(&ncl_iod_mutex); loaded = 0; break; #else /* FALLTHROUGH */ #endif default: error = EOPNOTSUPP; break; } return error; } static moduledata_t nfscl_mod = { "nfscl", nfscl_modevent, NULL, }; DECLARE_MODULE(nfscl, nfscl_mod, SI_SUB_VFS, SI_ORDER_FIRST); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfscl, 1); MODULE_DEPEND(nfscl, nfscommon, 1, 1, 1); MODULE_DEPEND(nfscl, krpc, 1, 1, 1); MODULE_DEPEND(nfscl, nfssvc, 1, 1, 1); MODULE_DEPEND(nfscl, nfslock, 1, 1, 1); Index: head/sys/fs/nfsclient/nfs_clvnops.c =================================================================== --- head/sys/fs/nfsclient/nfs_clvnops.c (revision 299412) +++ head/sys/fs/nfsclient/nfs_clvnops.c (revision 299413) @@ -1,3537 +1,3504 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 */ #include __FBSDID("$FreeBSD$"); /* * vnode op calls for Sun NFS version 2, 3 and 4 */ #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_nfsclient_accesscache_flush_probe_func_t dtrace_nfscl_accesscache_flush_done_probe; uint32_t nfscl_accesscache_flush_done_id; dtrace_nfsclient_accesscache_get_probe_func_t dtrace_nfscl_accesscache_get_hit_probe, dtrace_nfscl_accesscache_get_miss_probe; uint32_t nfscl_accesscache_get_hit_id; uint32_t nfscl_accesscache_get_miss_id; dtrace_nfsclient_accesscache_load_probe_func_t dtrace_nfscl_accesscache_load_done_probe; uint32_t nfscl_accesscache_load_done_id; #endif /* !KDTRACE_HOOKS */ /* Defs */ #define TRUE 1 #define FALSE 0 extern struct nfsstats newnfsstats; extern int nfsrv_useacl; extern int nfscl_debuglevel; MALLOC_DECLARE(M_NEWNFSREQ); /* * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these * calls are not in getblk() and brelse() so that they would not be necessary * here. */ #ifndef B_VMIO #define vfs_busy_pages(bp, f) #endif static vop_read_t nfsfifo_read; static vop_write_t nfsfifo_write; static vop_close_t nfsfifo_close; static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, struct thread *); static vop_lookup_t nfs_lookup; static vop_create_t nfs_create; static vop_mknod_t nfs_mknod; static vop_open_t nfs_open; static vop_pathconf_t nfs_pathconf; static vop_close_t nfs_close; static vop_access_t nfs_access; static vop_getattr_t nfs_getattr; static vop_setattr_t nfs_setattr; static vop_read_t nfs_read; static vop_fsync_t nfs_fsync; static vop_remove_t nfs_remove; static vop_link_t nfs_link; static vop_rename_t nfs_rename; static vop_mkdir_t nfs_mkdir; static vop_rmdir_t nfs_rmdir; static vop_symlink_t nfs_symlink; static vop_readdir_t nfs_readdir; static vop_strategy_t nfs_strategy; -static vop_lock1_t nfs_lock1; static int nfs_lookitup(struct vnode *, char *, int, struct ucred *, struct thread *, struct nfsnode **); static int nfs_sillyrename(struct vnode *, struct vnode *, struct componentname *); static vop_access_t nfsspec_access; static vop_readlink_t nfs_readlink; static vop_print_t nfs_print; static vop_advlock_t nfs_advlock; static vop_advlockasync_t nfs_advlockasync; static vop_getacl_t nfs_getacl; static vop_setacl_t nfs_setacl; /* * Global vfs data structures for nfs */ struct vop_vector newnfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = nfs_access, .vop_advlock = nfs_advlock, .vop_advlockasync = nfs_advlockasync, .vop_close = nfs_close, .vop_create = nfs_create, .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_getpages = ncl_getpages, .vop_putpages = ncl_putpages, .vop_inactive = ncl_inactive, .vop_link = nfs_link, - .vop_lock1 = nfs_lock1, .vop_lookup = nfs_lookup, .vop_mkdir = nfs_mkdir, .vop_mknod = nfs_mknod, .vop_open = nfs_open, .vop_pathconf = nfs_pathconf, .vop_print = nfs_print, .vop_read = nfs_read, .vop_readdir = nfs_readdir, .vop_readlink = nfs_readlink, .vop_reclaim = ncl_reclaim, .vop_remove = nfs_remove, .vop_rename = nfs_rename, .vop_rmdir = nfs_rmdir, .vop_setattr = nfs_setattr, .vop_strategy = nfs_strategy, .vop_symlink = nfs_symlink, .vop_write = ncl_write, .vop_getacl = nfs_getacl, .vop_setacl = nfs_setacl, }; struct vop_vector newnfs_fifoops = { .vop_default = &fifo_specops, .vop_access = nfsspec_access, .vop_close = nfsfifo_close, .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_inactive = ncl_inactive, .vop_print = nfs_print, .vop_read = nfsfifo_read, .vop_reclaim = ncl_reclaim, .vop_setattr = nfs_setattr, .vop_write = nfsfifo_write, }; static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap); static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, int namelen, struct ucred *cred, struct thread *td); static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, struct sillyrename *sp); /* * Global variables */ #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) SYSCTL_DECL(_vfs_nfs); static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); static int nfs_prime_access_cache = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, &nfs_prime_access_cache, 0, "Prime NFS ACCESS cache when fetching attributes"); static int newnfs_commit_on_close = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW, &newnfs_commit_on_close, 0, "write+commit on close, else only write"); static int nfs_clean_pages_on_close = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); int newnfs_directio_enable = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, &newnfs_directio_enable, 0, "Enable NFS directio"); int nfs_keep_dirty_on_error; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW, &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned"); /* * This sysctl allows other processes to mmap a file that has been opened * O_DIRECT by a process. In general, having processes mmap the file while * Direct IO is in progress can lead to Data Inconsistencies. But, we allow * this by default to prevent DoS attacks - to prevent a malicious user from * opening up files O_DIRECT preventing other users from mmap'ing these * files. "Protected" environments where stricter consistency guarantees are * required can disable this knob. The process that opened the file O_DIRECT * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not * meaningful. */ int newnfs_directio_allow_mmap = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); #if 0 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, &newnfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, &newnfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); #endif #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ | NFSACCESS_DELETE | NFSACCESS_LOOKUP) /* * SMP Locking Note : * The list of locks after the description of the lock is the ordering * of other locks acquired with the lock held. * np->n_mtx : Protects the fields in the nfsnode. VM Object Lock VI_MTX (acquired indirectly) * nmp->nm_mtx : Protects the fields in the nfsmount. rep->r_mtx * ncl_iod_mutex : Global lock, protects shared nfsiod state. * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. nmp->nm_mtx rep->r_mtx * rep->r_mtx : Protects the fields in an nfsreq. */ static int nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, struct ucred *cred, u_int32_t *retmode) { int error = 0, attrflag, i, lrupos; u_int32_t rmode; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, &rmode, NULL); if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { lrupos = 0; mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { if (np->n_accesscache[i].uid == cred->cr_uid) { np->n_accesscache[i].mode = rmode; np->n_accesscache[i].stamp = time_second; break; } if (i > 0 && np->n_accesscache[i].stamp < np->n_accesscache[lrupos].stamp) lrupos = i; } if (i == NFS_ACCESSCACHESIZE) { np->n_accesscache[lrupos].uid = cred->cr_uid; np->n_accesscache[lrupos].mode = rmode; np->n_accesscache[lrupos].stamp = time_second; } mtx_unlock(&np->n_mtx); if (retmode != NULL) *retmode = rmode; KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } #ifdef KDTRACE_HOOKS if (error != 0) KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, error); #endif return (error); } /* * nfs access vnode op. * For nfs version 2, just return ok. File accesses may fail later. * For nfs version 3, use the access rpc to check accessibility. If file modes * are changed on the server, accesses might still fail later. */ static int nfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; int error = 0, i, gotahit; u_int32_t mode, wmode, rmode; int v34 = NFS_ISV34(vp); struct nfsnode *np = VTONFS(vp); /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } /* * For nfs v3 or v4, check to see if we have done this recently, and if * so return our cached result instead of making an ACCESS call. * If not, do an access rpc, otherwise you are stuck emulating * ufs_access() locally using the vattr. This may not be correct, * since the server may apply other access criteria such as * client uid-->server uid mapping that we do not know about. */ if (v34) { if (ap->a_accmode & VREAD) mode = NFSACCESS_READ; else mode = 0; if (vp->v_type != VDIR) { if (ap->a_accmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (ap->a_accmode & VAPPEND) mode |= NFSACCESS_EXTEND; if (ap->a_accmode & VEXEC) mode |= NFSACCESS_EXECUTE; if (ap->a_accmode & VDELETE) mode |= NFSACCESS_DELETE; } else { if (ap->a_accmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (ap->a_accmode & VAPPEND) mode |= NFSACCESS_EXTEND; if (ap->a_accmode & VEXEC) mode |= NFSACCESS_LOOKUP; if (ap->a_accmode & VDELETE) mode |= NFSACCESS_DELETE; if (ap->a_accmode & VDELETE_CHILD) mode |= NFSACCESS_MODIFY; } /* XXX safety belt, only make blanket request if caching */ if (nfsaccess_cache_timeout > 0) { wmode = NFSACCESS_READ | NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_EXECUTE | NFSACCESS_DELETE | NFSACCESS_LOOKUP; } else { wmode = mode; } /* * Does our cached result allow us to give a definite yes to * this request? */ gotahit = 0; mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { if (time_second < (np->n_accesscache[i].stamp + nfsaccess_cache_timeout) && (np->n_accesscache[i].mode & mode) == mode) { NFSINCRGLOBAL(newnfsstats.accesscache_hits); gotahit = 1; } break; } } mtx_unlock(&np->n_mtx); #ifdef KDTRACE_HOOKS if (gotahit != 0) KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, ap->a_cred->cr_uid, mode); else KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, ap->a_cred->cr_uid, mode); #endif if (gotahit == 0) { /* * Either a no, or a don't know. Go to the wire. */ NFSINCRGLOBAL(newnfsstats.accesscache_misses); error = nfs34_access_otw(vp, wmode, ap->a_td, ap->a_cred, &rmode); if (!error && (rmode & mode) != mode) error = EACCES; } return (error); } else { if ((error = nfsspec_access(ap)) != 0) { return (error); } /* * Attempt to prevent a mapped root from accessing a file * which it shouldn't. We try to read a byte from the file * if the user is root and the file is not zero length. * After calling nfsspec_access, we should have the correct * file size cached. */ mtx_lock(&np->n_mtx); if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) && VTONFS(vp)->n_size > 0) { struct iovec aiov; struct uio auio; char buf[1]; mtx_unlock(&np->n_mtx); aiov.iov_base = buf; aiov.iov_len = 1; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_resid = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = ap->a_td; if (vp->v_type == VREG) error = ncl_readrpc(vp, &auio, ap->a_cred); else if (vp->v_type == VDIR) { char* bp; bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); aiov.iov_base = bp; aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; error = ncl_readdirrpc(vp, &auio, ap->a_cred, ap->a_td); free(bp, M_TEMP); } else if (vp->v_type == VLNK) error = ncl_readlinkrpc(vp, &auio, ap->a_cred); else error = EACCES; } else mtx_unlock(&np->n_mtx); return (error); } } /* * nfs open vnode op * Check to see if the type is ok * and that deletion is not in progress. * For paged in text files, you will need to flush the page cache * if consistency is lost. */ /* ARGSUSED */ static int nfs_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; int error; int fmode = ap->a_mode; struct ucred *cred; if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) return (EOPNOTSUPP); /* * For NFSv4, we need to do the Open Op before cache validation, * so that we conform to RFC3530 Sec. 9.3.1. */ if (NFS_ISV4(vp)) { error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); if (error) { error = nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); return (error); } } /* * Now, if this Open will be doing reading, re-validate/flush the * cache, so that Close/Open coherency is maintained. */ mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == EINTR || error == EIO) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); if (vp->v_type == VDIR) np->n_direofoffset = 0; mtx_unlock(&np->n_mtx); error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; if (NFS_ISV4(vp)) np->n_change = vattr.va_filerev; } else { mtx_unlock(&np->n_mtx); error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { if (vp->v_type == VDIR) np->n_direofoffset = 0; mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == EINTR || error == EIO) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; if (NFS_ISV4(vp)) np->n_change = vattr.va_filerev; } } /* * If the object has >= 1 O_DIRECT active opens, we disable caching. */ if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { if (np->n_directio_opens == 0) { mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_flag |= NNONCACHE; } np->n_directio_opens++; } /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) np->n_flag |= NWRITEOPENED; /* * If this is an open for writing, capture a reference to the * credentials, so they can be used by ncl_putpages(). Using * these write credentials is preferable to the credentials of * whatever thread happens to be doing the VOP_PUTPAGES() since * the write RPCs are less likely to fail with EACCES. */ if ((fmode & FWRITE) != 0) { cred = np->n_writecred; np->n_writecred = crhold(ap->a_cred); } else cred = NULL; mtx_unlock(&np->n_mtx); if (cred != NULL) crfree(cred); vnode_create_vobject(vp, vattr.va_size, ap->a_td); return (0); } /* * nfs close vnode op * What an NFS client should do upon close after writing is a debatable issue. * Most NFS clients push delayed writes to the server upon close, basically for * two reasons: * 1 - So that any write errors may be reported back to the client process * doing the close system call. By far the two most likely errors are * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. * 2 - To put a worst case upper bound on cache inconsistency between * multiple clients for the file. * There is also a consistency problem for Version 2 of the protocol w.r.t. * not being able to tell if other clients are writing a file concurrently, * since there is no way of knowing if the changed modify time in the reply * is only due to the write for this client. * (NFS Version 3 provides weak cache consistency data in the reply that * should be sufficient to detect and handle this case.) * * The current code does the following: * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers * for NFS Version 3 - flush dirty buffers to the server but don't invalidate * or commit them (this satisfies 1 and 2 except for the * case where the server crashes after this close but * before the commit RPC, which is felt to be "good * enough". Changing the last argument to ncl_flush() to * a 1 would force a commit operation, if it is felt a * commit is necessary now. * for NFS Version 4 - flush the dirty buffers and commit them, if * nfscl_mustflush() says this is necessary. * It is necessary if there is no write delegation held, * in order to satisfy open/close coherency. * If the file isn't cached on local stable storage, * it may be necessary in order to detect "out of space" * errors from the server, if the write delegation * issued by the server doesn't allow the file to grow. */ /* ARGSUSED */ static int nfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; struct ucred *cred; int error = 0, ret, localcred = 0; int fmode = ap->a_fflag; if ((vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)) return (0); /* * During shutdown, a_cred isn't valid, so just use root. */ if (ap->a_cred == NOCRED) { cred = newnfs_getcred(); localcred = 1; } else { cred = ap->a_cred; } if (vp->v_type == VREG) { /* * Examine and clean dirty pages, regardless of NMODIFIED. * This closes a major hole in close-to-open consistency. * We want to push out all dirty pages (and buffers) on * close, regardless of whether they were dirtied by * mmap'ed writes or via write(). */ if (nfs_clean_pages_on_close && vp->v_object) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); if (NFS_ISV3(vp)) { /* * Under NFSv3 we have dirty buffers to dispose of. We * must flush them to the NFS server. We have the option * of waiting all the way through the commit rpc or just * waiting for the initial write. The default is to only * wait through the initial write so the data is in the * server's cache, which is roughly similar to the state * a standard disk subsystem leaves the file in on close(). * * We cannot clear the NMODIFIED bit in np->n_flag due to * potential races with other processes, and certainly * cannot clear it if we don't commit. * These races occur when there is no longer the old * traditional vnode locking implemented for Vnode Ops. */ int cm = newnfs_commit_on_close ? 1 : 0; error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0); /* np->n_flag &= ~NMODIFIED; */ } else if (NFS_ISV4(vp)) { if (nfscl_mustflush(vp) != 0) { int cm = newnfs_commit_on_close ? 1 : 0; error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0); /* * as above w.r.t races when clearing * NMODIFIED. * np->n_flag &= ~NMODIFIED; */ } } else error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); mtx_lock(&np->n_mtx); } /* * Invalidate the attribute cache in all cases. * An open is going to fetch fresh attrs any way, other procs * on this node that have file open will be forced to do an * otw attr fetch, but this is safe. * --> A user found that their RPC count dropped by 20% when * this was commented out and I can't see any requirement * for it, so I've disabled it when negative lookups are * enabled. (What does this have to do with negative lookup * caching? Well nothing, except it was reported by the * same user that needed negative lookup caching and I wanted * there to be a way to disable it to see if it * is the cause of some caching/coherency issue that might * crop up.) */ if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) { np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; } mtx_unlock(&np->n_mtx); } if (NFS_ISV4(vp)) { /* * Get attributes so "change" is up to date. */ if (error == 0 && nfscl_mustflush(vp) != 0 && vp->v_type == VREG && (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) { ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva, NULL); if (!ret) { np->n_change = nfsva.na_filerev; (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 0); } } /* * and do the close. */ ret = nfsrpc_close(vp, 0, ap->a_td); if (!error && ret) error = ret; if (error) error = nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); } if (newnfs_directio_enable) KASSERT((np->n_directio_asyncwr == 0), ("nfs_close: dirty unflushed (%d) directio buffers\n", np->n_directio_asyncwr)); if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { mtx_lock(&np->n_mtx); KASSERT((np->n_directio_opens > 0), ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); np->n_directio_opens--; if (np->n_directio_opens == 0) np->n_flag &= ~NNONCACHE; mtx_unlock(&np->n_mtx); } if (localcred) NFSFREECRED(cred); return (error); } /* * nfs getattr call from vfs. */ static int nfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct thread *td = curthread; /* XXX */ struct nfsnode *np = VTONFS(vp); int error = 0; struct nfsvattr nfsva; struct vattr *vap = ap->a_vap; struct vattr vattr; /* * Update local times for special files. */ mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; mtx_unlock(&np->n_mtx); /* * First look in the cache. */ if (ncl_getattrcache(vp, &vattr) == 0) { vap->va_type = vattr.va_type; vap->va_mode = vattr.va_mode; vap->va_nlink = vattr.va_nlink; vap->va_uid = vattr.va_uid; vap->va_gid = vattr.va_gid; vap->va_fsid = vattr.va_fsid; vap->va_fileid = vattr.va_fileid; vap->va_size = vattr.va_size; vap->va_blocksize = vattr.va_blocksize; vap->va_atime = vattr.va_atime; vap->va_mtime = vattr.va_mtime; vap->va_ctime = vattr.va_ctime; vap->va_gen = vattr.va_gen; vap->va_flags = vattr.va_flags; vap->va_rdev = vattr.va_rdev; vap->va_bytes = vattr.va_bytes; vap->va_filerev = vattr.va_filerev; /* * Get the local modify time for the case of a write * delegation. */ nfscl_deleggetmodtime(vp, &vap->va_mtime); return (0); } if (NFS_ISV34(vp) && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) { NFSINCRGLOBAL(newnfsstats.accesscache_misses); nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); if (ncl_getattrcache(vp, ap->a_vap) == 0) { nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); return (0); } } error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL); if (!error) error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0); if (!error) { /* * Get the local modify time for the case of a write * delegation. */ nfscl_deleggetmodtime(vp, &vap->va_mtime); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } return (error); } /* * nfs setattr call. */ static int nfs_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct thread *td = curthread; /* XXX */ struct vattr *vap = ap->a_vap; int error = 0; u_quad_t tsize; #ifndef nolint tsize = (u_quad_t)0; #endif /* * Setting of flags and marking of atimes are not supported. */ if (vap->va_flags != VNOVAL) return (EOPNOTSUPP); /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: if (vap->va_mtime.tv_sec == VNOVAL && vap->va_atime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL && vap->va_uid == (uid_t)VNOVAL && vap->va_gid == (gid_t)VNOVAL) return (0); vap->va_size = VNOVAL; break; default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * We run vnode_pager_setsize() early (why?), * we must set np->n_size now to avoid vinvalbuf * V_SAVE races that might setsize a lower * value. */ mtx_lock(&np->n_mtx); tsize = np->n_size; mtx_unlock(&np->n_mtx); error = ncl_meta_setsize(vp, ap->a_cred, td, vap->va_size); mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { tsize = np->n_size; mtx_unlock(&np->n_mtx); if (vap->va_size == 0) error = ncl_vinvalbuf(vp, 0, td, 1); else error = ncl_vinvalbuf(vp, V_SAVE, td, 1); if (error) { vnode_pager_setsize(vp, tsize); return (error); } /* * Call nfscl_delegmodtime() to set the modify time * locally, as required. */ nfscl_delegmodtime(vp); } else mtx_unlock(&np->n_mtx); /* * np->n_size has already been set to vap->va_size * in ncl_meta_setsize(). We must set it again since * nfs_loadattrcache() could be called through * ncl_meta_setsize() and could modify np->n_size. */ mtx_lock(&np->n_mtx); np->n_vattr.na_size = np->n_size = vap->va_size; mtx_unlock(&np->n_mtx); } } else { mtx_lock(&np->n_mtx); if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && vp->v_type == VREG) { mtx_unlock(&np->n_mtx); if ((error = ncl_vinvalbuf(vp, V_SAVE, td, 1)) != 0 && (error == EINTR || error == EIO)) return (error); } else mtx_unlock(&np->n_mtx); } error = nfs_setattrrpc(vp, vap, ap->a_cred, td); if (error && vap->va_size != VNOVAL) { mtx_lock(&np->n_mtx); np->n_size = np->n_vattr.na_size = tsize; vnode_pager_setsize(vp, tsize); mtx_unlock(&np->n_mtx); } return (error); } /* * Do an nfs setattr rpc. */ static int nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { struct nfsnode *np = VTONFS(vp); int error, ret, attrflag, i; struct nfsvattr nfsva; if (NFS_ISV34(vp)) { mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) np->n_accesscache[i].stamp = 0; np->n_flag |= NDELEGMOD; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); } error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (error && NFS_ISV4(vp)) error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); return (error); } /* * nfs lookup call, one step at a time... * First look in cache * If not found, unlock the directory nfsnode and do the rpc */ static int nfs_lookup(struct vop_lookup_args *ap) { struct componentname *cnp = ap->a_cnp; struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct mount *mp = dvp->v_mount; int flags = cnp->cn_flags; struct vnode *newvp; struct nfsmount *nmp; struct nfsnode *np, *newnp; int error = 0, attrflag, dattrflag, ltype, ncticks; struct thread *td = cnp->cn_thread; struct nfsfh *nfhp; struct nfsvattr dnfsva, nfsva; struct vattr vattr; struct timespec nctime; *vpp = NULLVP; if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); if (dvp->v_type != VDIR) return (ENOTDIR); nmp = VFSTONFS(mp); np = VTONFS(dvp); /* For NFSv4, wait until any remove is done. */ mtx_lock(&np->n_mtx); while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { np->n_flag |= NREMOVEWANT; (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); } mtx_unlock(&np->n_mtx); if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) return (error); error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks); if (error > 0 && error != ENOENT) return (error); if (error == -1) { /* * Lookups of "." are special and always return the * current directory. cache_lookup() already handles * associated locking bookkeeping, etc. */ if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { /* XXX: Is this really correct? */ if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } /* * We only accept a positive hit in the cache if the * change time of the file matches our cached copy. * Otherwise, we discard the cache entry and fallback * to doing a lookup RPC. We also only trust cache * entries for less than nm_nametimeo seconds. * * To better handle stale file handles and attributes, * clear the attribute cache of this node if it is a * leaf component, part of an open() call, and not * locally modified before fetching the attributes. * This should allow stale file handles to be detected * here where we can fall back to a LOOKUP RPC to * recover rather than having nfs_open() detect the * stale file handle and failing open(2) with ESTALE. */ newvp = *vpp; newnp = VTONFS(newvp); if (!(nmp->nm_flag & NFSMNT_NOCTO) && (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && !(newnp->n_flag & NMODIFIED)) { mtx_lock(&newnp->n_mtx); newnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); mtx_unlock(&newnp->n_mtx); } if (nfscl_nodeleg(newvp, 0) == 0 || ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && timespeccmp(&vattr.va_ctime, &nctime, ==))) { NFSINCRGLOBAL(newnfsstats.lookupcache_hits); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } cache_purge(newvp); if (dvp != newvp) vput(newvp); else vrele(newvp); *vpp = NULLVP; } else if (error == ENOENT) { if (dvp->v_iflag & VI_DOOMED) return (ENOENT); /* * We only accept a negative hit in the cache if the * modification time of the parent directory matches * the cached copy in the name cache entry. * Otherwise, we discard all of the negative cache * entries for this directory. We also only trust * negative cache entries for up to nm_negnametimeo * seconds. */ if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && timespeccmp(&vattr.va_mtime, &nctime, ==)) { NFSINCRGLOBAL(newnfsstats.lookupcache_hits); return (ENOENT); } cache_purge_negative(dvp); } error = 0; newvp = NULLVP; NFSINCRGLOBAL(newnfsstats.lookupcache_misses); error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (error) { if (newvp != NULLVP) { vput(newvp); *vpp = NULLVP; } if (error != ENOENT) { if (NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* The requested file was not found. */ if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN)) { /* * XXX: UFS does a full VOP_ACCESS(dvp, * VWRITE) here instead of just checking * MNT_RDONLY. */ if (mp->mnt_flag & MNT_RDONLY) return (EROFS); cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) { /* * Cache the modification time of the parent * directory from the post-op attributes in * the name cache entry. The negative cache * entry will be ignored once the directory * has changed. Don't bother adding the entry * if the directory has already changed. */ mtx_lock(&np->n_mtx); if (timespeccmp(&np->n_vattr.na_mtime, &dnfsva.na_mtime, ==)) { mtx_unlock(&np->n_mtx); cache_enter_time(dvp, NULL, cnp, &dnfsva.na_mtime, NULL); } else mtx_unlock(&np->n_mtx); } return (ENOENT); } /* * Handle RENAME case... */ if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { FREE((caddr_t)nfhp, M_NFSFH); return (EISDIR); } error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, LK_EXCLUSIVE); if (error) return (error); newvp = NFSTOV(np); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); *vpp = newvp; cnp->cn_flags |= SAVENAME; return (0); } if (flags & ISDOTDOT) { ltype = NFSVOPISLOCKED(dvp); error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); NFSVOPUNLOCK(dvp, 0); error = vfs_busy(mp, 0); NFSVOPLOCK(dvp, ltype | LK_RETRY); vfs_rel(mp); if (error == 0 && (dvp->v_iflag & VI_DOOMED)) { vfs_unbusy(mp); error = ENOENT; } if (error != 0) return (error); } NFSVOPUNLOCK(dvp, 0); error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, cnp->cn_lkflags); if (error == 0) newvp = NFSTOV(np); vfs_unbusy(mp); if (newvp != dvp) NFSVOPLOCK(dvp, ltype | LK_RETRY); if (dvp->v_iflag & VI_DOOMED) { if (error == 0) { if (newvp == dvp) vrele(newvp); else vput(newvp); } error = ENOENT; } if (error != 0) return (error); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { FREE((caddr_t)nfhp, M_NFSFH); VREF(dvp); newvp = dvp; if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else { error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, cnp->cn_lkflags); if (error) return (error); newvp = NFSTOV(np); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && !(np->n_flag & NMODIFIED)) { /* * Flush the attribute cache when opening a * leaf node to ensure that fresh attributes * are fetched in nfs_open() since we did not * fetch attributes from the LOOKUP reply. */ mtx_lock(&np->n_mtx); np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); mtx_unlock(&np->n_mtx); } } if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; if ((cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0)) cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime); *vpp = newvp; return (0); } /* * nfs read call. * Just call ncl_bioread() to do the work. */ static int nfs_read(struct vop_read_args *ap) { struct vnode *vp = ap->a_vp; switch (vp->v_type) { case VREG: return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); case VDIR: return (EISDIR); default: return (EOPNOTSUPP); } } /* * nfs readlink call */ static int nfs_readlink(struct vop_readlink_args *ap) { struct vnode *vp = ap->a_vp; if (vp->v_type != VLNK) return (EINVAL); return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); } /* * Do a readlink rpc. * Called by ncl_doio() from below the buffer cache. */ int ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int error, ret, attrflag; struct nfsvattr nfsva; error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (error && NFS_ISV4(vp)) error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs read rpc call * Ditto above */ int ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int error, ret, attrflag; struct nfsvattr nfsva; struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); error = EIO; attrflag = 0; if (NFSHASPNFS(nmp)) error = nfscl_doiods(vp, uiop, NULL, NULL, NFSV4OPEN_ACCESSREAD, cred, uiop->uio_td); NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); if (error != 0) error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (error && NFS_ISV4(vp)) error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs write call */ int ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, int *iomode, int *must_commit, int called_from_strategy) { struct nfsvattr nfsva; int error, attrflag, ret; struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); error = EIO; attrflag = 0; if (NFSHASPNFS(nmp)) error = nfscl_doiods(vp, uiop, iomode, must_commit, NFSV4OPEN_ACCESSWRITE, cred, uiop->uio_td); NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); if (error != 0) error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy); if (attrflag) { if (VTONFS(vp)->n_flag & ND_NFSV4) ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, 1); else ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (DOINGASYNC(vp)) *iomode = NFSWRITE_FILESYNC; if (error && NFS_ISV4(vp)) error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct nfsvattr nfsva, dnfsva; struct vnode *newvp = NULL; struct nfsnode *np = NULL, *dnp; struct nfsfh *nfhp; struct vattr vattr; int error = 0, attrflag, dattrflag; u_int32_t rdev; if (vap->va_type == VCHR || vap->va_type == VBLK) rdev = vap->va_rdev; else if (vap->va_type == VFIFO || vap->va_type == VSOCK) rdev = 0xffffffff; else return (EOPNOTSUPP); if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) return (error); error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (!error) { if (!nfhp) (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (nfhp) error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); } if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (!error) { newvp = NFSTOV(np); if (attrflag != 0) { error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); if (error != 0) vput(newvp); } } if (!error) { *vpp = newvp; } else if (NFS_ISV4(dvp)) { error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (!dattrflag) { dnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } mtx_unlock(&dnp->n_mtx); return (error); } /* * nfs mknod vop * just call nfs_mknodrpc() to do the work. */ /* ARGSUSED */ static int nfs_mknod(struct vop_mknod_args *ap) { return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); } static struct mtx nfs_cverf_mtx; MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", MTX_DEF); static nfsquad_t nfs_get_cverf(void) { static nfsquad_t cverf; nfsquad_t ret; static int cverf_initialized = 0; mtx_lock(&nfs_cverf_mtx); if (cverf_initialized == 0) { cverf.lval[0] = arc4random(); cverf.lval[1] = arc4random(); cverf_initialized = 1; } else cverf.qval++; ret = cverf; mtx_unlock(&nfs_cverf_mtx); return (ret); } /* * nfs file create call */ static int nfs_create(struct vop_create_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = NULL, *dnp; struct vnode *newvp = NULL; struct nfsmount *nmp; struct nfsvattr dnfsva, nfsva; struct nfsfh *nfhp; nfsquad_t cverf; int error = 0, attrflag, dattrflag, fmode = 0; struct vattr vattr; /* * Oops, not for me.. */ if (vap->va_type == VSOCK) return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) return (error); if (vap->va_vaflags & VA_EXCLUSIVE) fmode |= O_EXCL; dnp = VTONFS(dvp); nmp = VFSTONFS(vnode_mount(dvp)); again: /* For NFSv4, wait until any remove is done. */ mtx_lock(&dnp->n_mtx); while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { dnp->n_flag |= NREMOVEWANT; (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); } mtx_unlock(&dnp->n_mtx); cverf = nfs_get_cverf(); error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (!error) { if (nfhp == NULL) (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (nfhp != NULL) error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); } if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (!error) { newvp = NFSTOV(np); if (attrflag == 0) error = nfsrpc_getattr(newvp, cnp->cn_cred, cnp->cn_thread, &nfsva, NULL); if (error == 0) error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } if (error) { if (newvp != NULL) { vput(newvp); newvp = NULL; } if (NFS_ISV34(dvp) && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { fmode &= ~O_EXCL; goto again; } } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { if (nfscl_checksattr(vap, &nfsva)) { error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, cnp->cn_thread, &nfsva, &attrflag, NULL); if (error && (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL)) { /* try again without setting uid/gid */ vap->va_uid = (uid_t)VNOVAL; vap->va_gid = (uid_t)VNOVAL; error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, cnp->cn_thread, &nfsva, &attrflag, NULL); } if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); if (error != 0) vput(newvp); } } if (!error) { if ((cnp->cn_flags & MAKEENTRY) && attrflag) cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL); *ap->a_vpp = newvp; } else if (NFS_ISV4(dvp)) { error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (!dattrflag) { dnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } mtx_unlock(&dnp->n_mtx); return (error); } /* * nfs file remove call * To try and make nfs semantics closer to ufs semantics, a file that has * other processes using the vnode is renamed instead of removed and then * removed later on the last close. * - If v_usecount > 1 * If a rename is not already in the works * call nfs_sillyrename() to set it up * else * do the remove rpc */ static int nfs_remove(struct vop_remove_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = VTONFS(vp); int error = 0; struct vattr vattr; KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name")); KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); if (vp->v_type == VDIR) error = EPERM; else if (vrefcnt(vp) == 1 || (np->n_sillyrename && VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && vattr.va_nlink > 1)) { /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is * minimized. Without node locking it can still happen, such * that an I/O op returns ESTALE, but since you get this if * another host removes the file.. */ cache_purge(vp); /* * throw away biocache buffers, mainly to avoid * unnecessary delayed writes later. */ error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1); /* Do the rpc */ if (error != EINTR && error != EIO) error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); /* * Kludge City: If the first reply to the remove rpc is lost.. * the reply to the retransmitted request will be ENOENT * since the file was in fact removed * Therefore, we cheat and return success. */ if (error == ENOENT) error = 0; } else if (!np->n_sillyrename) error = nfs_sillyrename(dvp, vp, cnp); mtx_lock(&np->n_mtx); np->n_attrstamp = 0; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); return (error); } /* * nfs file remove rpc called from nfs_inactive */ int ncl_removeit(struct sillyrename *sp, struct vnode *vp) { /* * Make sure that the directory vnode is still valid. * XXX we should lock sp->s_dvp here. */ if (sp->s_dvp->v_type == VBAD) return (0); return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, sp->s_cred, NULL)); } /* * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). */ static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, int namelen, struct ucred *cred, struct thread *td) { struct nfsvattr dnfsva; struct nfsnode *dnp = VTONFS(dvp); int error = 0, dattrflag; mtx_lock(&dnp->n_mtx); dnp->n_flag |= NREMOVEINPROG; mtx_unlock(&dnp->n_mtx); error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, &dattrflag, NULL); mtx_lock(&dnp->n_mtx); if ((dnp->n_flag & NREMOVEWANT)) { dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); mtx_unlock(&dnp->n_mtx); wakeup((caddr_t)dnp); } else { dnp->n_flag &= ~NREMOVEINPROG; mtx_unlock(&dnp->n_mtx); } if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (!dattrflag) { dnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } mtx_unlock(&dnp->n_mtx); if (error && NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs file rename call */ static int nfs_rename(struct vop_rename_args *ap) { struct vnode *fvp = ap->a_fvp; struct vnode *tvp = ap->a_tvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tdvp = ap->a_tdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct nfsnode *fnp = VTONFS(ap->a_fvp); struct nfsnode *tdnp = VTONFS(ap->a_tdvp); struct nfsv4node *newv4 = NULL; int error; KASSERT((tcnp->cn_flags & HASBUF) != 0 && (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name")); /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } if (fvp == tvp) { ncl_printf("nfs_rename: fvp == tvp (can't happen)\n"); error = 0; goto out; } if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0) goto out; /* * We have to flush B_DELWRI data prior to renaming * the file. If we don't, the delayed-write buffers * can be flushed out later after the file has gone stale * under NFSV3. NFSV2 does not have this problem because * ( as far as I can tell ) it flushes dirty buffers more * often. * * Skip the rename operation if the fsync fails, this can happen * due to the server's volume being full, when we pushed out data * that was written back to our cache earlier. Not checking for * this condition can result in potential (silent) data loss. */ error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); NFSVOPUNLOCK(fvp, 0); if (!error && tvp) error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); if (error) goto out; /* * If the tvp exists and is in use, sillyrename it before doing the * rename of the new file over it. * XXX Can't sillyrename a directory. */ if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { vput(tvp); tvp = NULL; } error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, tcnp->cn_thread); if (error == 0 && NFS_ISV4(tdvp)) { /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ MALLOC(newv4, struct nfsv4node *, sizeof (struct nfsv4node) + tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); mtx_lock(&tdnp->n_mtx); mtx_lock(&fnp->n_mtx); if (fnp->n_v4 != NULL && fvp->v_type == VREG && (fnp->n_v4->n4_namelen != tcnp->cn_namelen || NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen) || tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, tdnp->n_fhp->nfh_len))) { #ifdef notdef { char nnn[100]; int nnnl; nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99; bcopy(tcnp->cn_nameptr, nnn, nnnl); nnn[nnnl] = '\0'; printf("ren replace=%s\n",nnn); } #endif FREE((caddr_t)fnp->n_v4, M_NFSV4NODE); fnp->n_v4 = newv4; newv4 = NULL; fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; fnp->n_v4->n4_namelen = tcnp->cn_namelen; NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, tdnp->n_fhp->nfh_len); NFSBCOPY(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); } mtx_unlock(&tdnp->n_mtx); mtx_unlock(&fnp->n_mtx); if (newv4 != NULL) FREE((caddr_t)newv4, M_NFSV4NODE); } if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); /* * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs file rename rpc called from nfs_remove() above */ static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, struct sillyrename *sp) { return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); } /* * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). */ static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td) { struct nfsvattr fnfsva, tnfsva; struct nfsnode *fdnp = VTONFS(fdvp); struct nfsnode *tdnp = VTONFS(tdvp); int error = 0, fattrflag, tattrflag; error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, &tattrflag, NULL, NULL); mtx_lock(&fdnp->n_mtx); fdnp->n_flag |= NMODIFIED; if (fattrflag != 0) { mtx_unlock(&fdnp->n_mtx); (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1); } else { fdnp->n_attrstamp = 0; mtx_unlock(&fdnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); } mtx_lock(&tdnp->n_mtx); tdnp->n_flag |= NMODIFIED; if (tattrflag != 0) { mtx_unlock(&tdnp->n_mtx); (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1); } else { tdnp->n_attrstamp = 0; mtx_unlock(&tdnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); } if (error && NFS_ISV4(fdvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs hard link create call */ static int nfs_link(struct vop_link_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *np, *tdnp; struct nfsvattr nfsva, dnfsva; int error = 0, attrflag, dattrflag; /* * Push all writes to the server, so that the attribute cache * doesn't get "out of sync" with the server. * XXX There should be a better way! */ VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag, &dattrflag, NULL); tdnp = VTONFS(tdvp); mtx_lock(&tdnp->n_mtx); tdnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&tdnp->n_mtx); (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1); } else { tdnp->n_attrstamp = 0; mtx_unlock(&tdnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); } if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); else { np = VTONFS(vp); mtx_lock(&np->n_mtx); np->n_attrstamp = 0; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } /* * If negative lookup caching is enabled, I might as well * add an entry for this node. Not necessary for correctness, * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL); } if (error && NFS_ISV4(vp)) error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs symbolic link create call */ static int nfs_symlink(struct vop_symlink_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsvattr nfsva, dnfsva; struct nfsfh *nfhp; struct nfsnode *np = NULL, *dnp; struct vnode *newvp = NULL; int error = 0, attrflag, dattrflag, ret; vap->va_type = VLNK; error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (nfhp) { ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); if (!ret) newvp = NFSTOV(np); else if (!error) error = ret; } if (newvp != NULL) { if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else if (!error) { /* * If we do not have an error and we could not extract the * newvp from the response due to the request being NFSv2, we * have to do a lookup in order to obtain a newvp to return. */ error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) newvp = NFSTOV(np); } if (error) { if (newvp) vput(newvp); if (NFS_ISV4(dvp)) error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } else { *ap->a_vpp = newvp; } dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&dnp->n_mtx); (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } /* * If negative lookup caching is enabled, I might as well * add an entry for this node. Not necessary for correctness, * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL); } return (error); } /* * nfs make dir call */ static int nfs_mkdir(struct vop_mkdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = NULL, *dnp; struct vnode *newvp = NULL; struct vattr vattr; struct nfsfh *nfhp; struct nfsvattr nfsva, dnfsva; int error = 0, attrflag, dattrflag, ret; if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) return (error); vap->va_type = VDIR; error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&dnp->n_mtx); (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } if (nfhp) { ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); if (!ret) { newvp = NFSTOV(np); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else if (!error) error = ret; } if (!error && newvp == NULL) { error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) { newvp = NFSTOV(np); if (newvp->v_type != VDIR) error = EEXIST; } } if (error) { if (newvp) vput(newvp); if (NFS_ISV4(dvp)) error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } else { /* * If negative lookup caching is enabled, I might as well * add an entry for this node. Not necessary for correctness, * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && dattrflag != 0) cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, &dnfsva.na_ctime); *ap->a_vpp = newvp; } return (error); } /* * nfs remove directory call */ static int nfs_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *dnp; struct nfsvattr dnfsva; int error, dattrflag; if (dvp == vp) return (EINVAL); error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL); dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&dnp->n_mtx); (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } cache_purge(dvp); cache_purge(vp); if (error && NFS_ISV4(dvp)) error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, (gid_t)0); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs readdir call */ static int nfs_readdir(struct vop_readdir_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct uio *uio = ap->a_uio; ssize_t tresid, left; int error = 0; struct vattr vattr; if (ap->a_eofflag != NULL) *ap->a_eofflag = 0; if (vp->v_type != VDIR) return(EPERM); /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { mtx_lock(&np->n_mtx); if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { mtx_unlock(&np->n_mtx); NFSINCRGLOBAL(newnfsstats.direofcache_hits); if (ap->a_eofflag != NULL) *ap->a_eofflag = 1; return (0); } else mtx_unlock(&np->n_mtx); } } /* * NFS always guarantees that directory entries don't straddle * DIRBLKSIZ boundaries. As such, we need to limit the size * to an exact multiple of DIRBLKSIZ, to avoid copying a partial * directory entry. */ left = uio->uio_resid % DIRBLKSIZ; if (left == uio->uio_resid) return (EINVAL); uio->uio_resid -= left; /* * Call ncl_bioread() to do the real work. */ tresid = uio->uio_resid; error = ncl_bioread(vp, uio, 0, ap->a_cred); if (!error && uio->uio_resid == tresid) { NFSINCRGLOBAL(newnfsstats.direofcache_misses); if (ap->a_eofflag != NULL) *ap->a_eofflag = 1; } /* Add the partial DIRBLKSIZ (left) back in. */ uio->uio_resid += left; return (error); } /* * Readdir rpc call. * Called from below the buffer cache by ncl_doio(). */ int ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, struct thread *td) { struct nfsvattr nfsva; nfsuint64 *cookiep, cookie; struct nfsnode *dnp = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, eof, attrflag; KASSERT(uiop->uio_iovcnt == 1 && (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, ("nfs readdirrpc bad uio")); /* * If there is no cookie, assume directory was stale. */ ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) { cookie = *cookiep; ncl_dircookie_unlock(dnp); } else { ncl_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); } if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) (void)ncl_fsinfo(nmp, vp, cred, td); error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, &attrflag, &eof, NULL); if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { /* * We are now either at the end of the directory or have filled * the block. */ if (eof) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) ncl_printf("EEK! readdirrpc resid > 0\n"); ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; ncl_dircookie_unlock(dnp); } } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } return (error); } /* * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). */ int ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, struct thread *td) { struct nfsvattr nfsva; nfsuint64 *cookiep, cookie; struct nfsnode *dnp = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, attrflag, eof; KASSERT(uiop->uio_iovcnt == 1 && (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, ("nfs readdirplusrpc bad uio")); /* * If there is no cookie, assume directory was stale. */ ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) { cookie = *cookiep; ncl_dircookie_unlock(dnp); } else { ncl_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); } if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) (void)ncl_fsinfo(nmp, vp, cred, td); error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, &attrflag, &eof, NULL); if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { /* * We are now either at end of the directory or have filled the * the block. */ if (eof) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) ncl_printf("EEK! readdirplusrpc resid > 0\n"); ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; ncl_dircookie_unlock(dnp); } } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } return (error); } /* * Silly rename. To make the NFS filesystem that is stateless look a little * more like the "ufs" a remove of an active vnode is translated to a rename * to a funny looking filename that is removed by nfs_inactive on the * nfsnode. There is the potential for another process on a different client * to create the same funny name between the nfs_lookitup() fails and the * nfs_rename() completes, but... */ static int nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { struct sillyrename *sp; struct nfsnode *np; int error; short pid; unsigned int lticks; cache_purge(dvp); np = VTONFS(vp); KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), M_NEWNFSREQ, M_WAITOK); sp->s_cred = crhold(cnp->cn_cred); sp->s_dvp = dvp; VREF(dvp); /* * Fudge together a funny name. * Changing the format of the funny name to accommodate more * sillynames per directory. * The name is now changed to .nfs...4, where ticks is * CPU ticks since boot. */ pid = cnp->cn_thread->td_proc->p_pid; lticks = (unsigned int)ticks; for ( ; ; ) { sp->s_namlen = sprintf(sp->s_name, ".nfs.%08x.%04x4.4", lticks, pid); if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, NULL)) break; lticks++; } error = nfs_renameit(dvp, vp, cnp, sp); if (error) goto bad; error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, &np); np->n_sillyrename = sp; return (0); bad: vrele(sp->s_dvp); crfree(sp->s_cred); free((caddr_t)sp, M_NEWNFSREQ); return (error); } /* * Look up a file name and optionally either update the file handle or * allocate an nfsnode, depending on the value of npp. * npp == NULL --> just do the lookup * *npp == NULL --> allocate a new nfsnode and make sure attributes are * handled too * *npp != NULL --> update the file handle in the vnode */ static int nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, struct thread *td, struct nfsnode **npp) { struct vnode *newvp = NULL, *vp; struct nfsnode *np, *dnp = VTONFS(dvp); struct nfsfh *nfhp, *onfhp; struct nfsvattr nfsva, dnfsva; struct componentname cn; int error = 0, attrflag, dattrflag; u_int hash; error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (npp && !error) { if (*npp != NULL) { np = *npp; vp = NFSTOV(np); /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ if (np->n_v4 != NULL && nfsva.na_type == VREG && (np->n_v4->n4_namelen != len || NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { #ifdef notdef { char nnn[100]; int nnnl; nnnl = (len < 100) ? len : 99; bcopy(name, nnn, nnnl); nnn[nnnl] = '\0'; printf("replace=%s\n",nnn); } #endif FREE((caddr_t)np->n_v4, M_NFSV4NODE); MALLOC(np->n_v4, struct nfsv4node *, sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + len - 1, M_NFSV4NODE, M_WAITOK); np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = len; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); } hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, FNV1_32_INIT); onfhp = np->n_fhp; /* * Rehash node for new file handle. */ vfs_hash_rehash(vp, hash); np->n_fhp = nfhp; if (onfhp != NULL) FREE((caddr_t)onfhp, M_NFSFH); newvp = NFSTOV(np); } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { FREE((caddr_t)nfhp, M_NFSFH); VREF(dvp); newvp = dvp; } else { cn.cn_nameptr = name; cn.cn_namelen = len; error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, &np, NULL, LK_EXCLUSIVE); if (error) return (error); newvp = NFSTOV(np); } if (!attrflag && *npp == NULL) { if (newvp == dvp) vrele(newvp); else vput(newvp); return (ENOENT); } if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } if (npp && *npp == NULL) { if (error) { if (newvp) { if (newvp == dvp) vrele(newvp); else vput(newvp); } } else *npp = np; } if (error && NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * Nfs Version 3 and 4 commit rpc */ int ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct thread *td) { struct nfsvattr nfsva; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error, attrflag; mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { mtx_unlock(&nmp->nm_mtx); return (0); } mtx_unlock(&nmp->nm_mtx); error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, &attrflag, NULL); if (attrflag != 0) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (error != 0 && NFS_ISV4(vp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * Strategy routine. * For async requests when nfsiod(s) are running, queue the request by * calling ncl_asyncio(), otherwise just all ncl_doio() to do the * request. */ static int nfs_strategy(struct vop_strategy_args *ap) { struct buf *bp = ap->a_bp; struct ucred *cr; KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); BUF_ASSERT_HELD(bp); if (bp->b_iocmd == BIO_READ) cr = bp->b_rcred; else cr = bp->b_wcred; /* * If the op is asynchronous and an i/o daemon is waiting * queue the request, wake it up and wait for completion * otherwise just do it ourselves. */ if ((bp->b_flags & B_ASYNC) == 0 || ncl_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) (void) ncl_doio(ap->a_vp, bp, cr, curthread, 1); return (0); } /* * fsync vnode op. Just call ncl_flush() with commit == 1. */ /* ARGSUSED */ static int nfs_fsync(struct vop_fsync_args *ap) { if (ap->a_vp->v_type != VREG) { /* * For NFS, metadata is changed synchronously on the server, * so there is nothing to flush. Also, ncl_flush() clears * the NMODIFIED flag and that shouldn't be done here for * directories. */ return (0); } return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1, 0)); } /* * Flush all the blocks associated with a vnode. * Walk through the buffer pool and push any dirty pages * associated with the vnode. * If the called_from_renewthread argument is TRUE, it has been called * from the NFSv4 renew thread and, as such, cannot block indefinitely * waiting for a buffer write to complete. */ int ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td, int commit, int called_from_renewthread) { struct nfsnode *np = VTONFS(vp); struct buf *bp; int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1, trycnt = 0; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; struct bufobj *bo; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; int bvecsize = 0, bveccount; if (called_from_renewthread != 0) slptimeo = hz; if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; if (!commit) passone = 0; bo = &vp->v_bufobj; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but has not been committed to stable storage on the server * yet. On the first pass, the byte range is worked out and the commit * rpc is done. On the second pass, ncl_writebp() is called to do the * job. */ again: off = (u_quad_t)-1; endoff = 0; bvecpos = 0; if (NFS_ISV34(vp) && commit) { if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (!BUF_ISLOCKED(bp) && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bveccount++; } /* * Allocate space to remember the list of bufs to commit. It is * important to use M_NOWAIT here to avoid a race with nfs_write. * If we can't get memory (for whatever reason), we will end up * committing the buffers one-by-one in the loop below. */ if (bveccount > NFS_COMMITBVECSIZ) { /* * Release the vnode interlock to avoid a lock * order reversal. */ BO_UNLOCK(bo); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); BO_LOCK(bo); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } else bvecsize = bveccount; } else { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bvecpos >= bvecsize) break; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { nbp = TAILQ_NEXT(bp, b_bobufs); continue; } if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != (B_DELWRI | B_NEEDCOMMIT)) { BUF_UNLOCK(bp); nbp = TAILQ_NEXT(bp, b_bobufs); continue; } BO_UNLOCK(bo); bremfree(bp); /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. * * NOTE: we are not clearing B_DONE here, so we have * to do it later on in this routine if we intend to * initiate I/O on the bp. * * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ if (wcred == NULL) wcred = bp->b_wcred; else if (wcred != bp->b_wcred) wcred = NOCRED; vfs_busy_pages(bp, 1); BO_LOCK(bo); /* * bp is protected by being locked, but nbp is not * and vfs_busy_pages() may sleep. We have to * recalculate nbp. */ nbp = TAILQ_NEXT(bp, b_bobufs); /* * A list of these buffers is kept so that the * second loop knows which buffers have actually * been committed. This is necessary, since there * may be a race between the commit rpc and new * uncommitted writes on the file. */ bvec[bvecpos++] = bp; toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; if (toff < off) off = toff; toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); if (toff > endoff) endoff = toff; } BO_UNLOCK(bo); } if (bvecpos > 0) { /* * Commit data on the server, as required. * If all bufs are using the same wcred, then use that with * one call for all of them, otherwise commit each one * separately. */ if (wcred != NOCRED) retv = ncl_commit(vp, off, (int)(endoff - off), wcred, td); else { retv = 0; for (i = 0; i < bvecpos; i++) { off_t off, size; bp = bvec[i]; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; size = (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); retv = ncl_commit(vp, off, (int)size, bp->b_wcred, td); if (retv) break; } } if (retv == NFSERR_STALEWRITEVERF) ncl_clearcommit(vp->v_mount); /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit * succeeded. */ for (i = 0; i < bvecpos; i++) { bp = bvec[i]; bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); if (retv) { /* * Error, leave B_DELWRI intact */ vfs_unbusy_pages(bp); brelse(bp); } else { /* * Success, remove B_DELWRI ( bundirty() ). * * b_dirtyoff/b_dirtyend seem to be NFS * specific. We should probably move that * into bundirty(). XXX */ bufobj_wref(bo); bp->b_flags |= B_ASYNC; bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_dirtyoff = bp->b_dirtyend = 0; bufdone(bp); } } } /* * Start/do any write(s) that are required. */ loop: BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { if (waitfor != MNT_WAIT || passone) continue; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo); if (error == 0) { BUF_UNLOCK(bp); goto loop; } if (error == ENOLCK) { error = 0; goto loop; } if (called_from_renewthread != 0) { /* * Return EIO so the flush will be retried * later. */ error = EIO; goto done; } if (newnfs_sigintr(nmp, td)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } goto loop; } if ((bp->b_flags & B_DELWRI) == 0) panic("nfs_fsync: not dirty"); if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { BUF_UNLOCK(bp); continue; } BO_UNLOCK(bo); bremfree(bp); if (passone || !commit) bp->b_flags |= B_ASYNC; else bp->b_flags |= B_ASYNC; bwrite(bp); if (newnfs_sigintr(nmp, td)) { error = EINTR; goto done; } goto loop; } if (passone) { passone = 0; BO_UNLOCK(bo); goto again; } if (waitfor == MNT_WAIT) { while (bo->bo_numoutput) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); if (called_from_renewthread != 0) { /* * Return EIO so that the flush will be * retried later. */ error = EIO; goto done; } error = newnfs_sigintr(nmp, td); if (error) goto done; if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } BO_LOCK(bo); } } if (bo->bo_dirty.bv_cnt != 0 && commit) { BO_UNLOCK(bo); goto loop; } /* * Wait for all the async IO requests to drain */ BO_UNLOCK(bo); mtx_lock(&np->n_mtx); while (np->n_directio_asyncwr > 0) { np->n_flag |= NFSYNCWAIT; error = newnfs_msleep(td, &np->n_directio_asyncwr, &np->n_mtx, slpflag | (PRIBIO + 1), "nfsfsync", 0); if (error) { if (newnfs_sigintr(nmp, td)) { mtx_unlock(&np->n_mtx); error = EINTR; goto done; } } } mtx_unlock(&np->n_mtx); } else BO_UNLOCK(bo); if (NFSHASPNFS(nmp)) { nfscl_layoutcommit(vp, td); /* * Invalidate the attribute cache, since writes to a DS * won't update the size attribute. */ mtx_lock(&np->n_mtx); np->n_attrstamp = 0; } else mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } if (commit && bo->bo_dirty.bv_cnt == 0 && bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; mtx_unlock(&np->n_mtx); done: if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); if (error == 0 && commit != 0 && waitfor == MNT_WAIT && (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 || np->n_directio_asyncwr != 0) && trycnt++ < 5) { /* try, try again... */ passone = 1; wcred = NULL; bvec = NULL; bvecsize = 0; printf("try%d\n", trycnt); goto again; } return (error); } /* * NFS advisory byte-level locks. */ static int nfs_advlock(struct vop_advlock_args *ap) { struct vnode *vp = ap->a_vp; struct ucred *cred; struct nfsnode *np = VTONFS(ap->a_vp); struct proc *p = (struct proc *)ap->a_id; struct thread *td = curthread; /* XXX */ struct vattr va; int ret, error = EOPNOTSUPP; u_quad_t size; if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { if (vp->v_type != VREG) return (EINVAL); if ((ap->a_flags & F_POSIX) != 0) cred = p->p_ucred; else cred = td->td_ucred; NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { NFSVOPUNLOCK(vp, 0); return (EBADF); } /* * If this is unlocking a write locked region, flush and * commit them before unlocking. This is required by * RFC3530 Sec. 9.3.2. */ if (ap->a_op == F_UNLCK && nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, ap->a_flags)) (void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0); /* * Loop around doing the lock op, while a blocking lock * must wait for the lock op to succeed. */ do { ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && ap->a_op == F_SETLK) { NFSVOPUNLOCK(vp, 0); error = nfs_catnap(PZERO | PCATCH, ret, "ncladvl"); if (error) return (EINTR); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { NFSVOPUNLOCK(vp, 0); return (EBADF); } } } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && ap->a_op == F_SETLK); if (ret == NFSERR_DENIED) { NFSVOPUNLOCK(vp, 0); return (EAGAIN); } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { NFSVOPUNLOCK(vp, 0); return (ret); } else if (ret != 0) { NFSVOPUNLOCK(vp, 0); return (EACCES); } /* * Now, if we just got a lock, invalidate data in the buffer * cache, as required, so that the coherency conforms with * RFC3530 Sec. 9.3.2. */ if (ap->a_op == F_SETLK) { if ((np->n_flag & NMODIFIED) == 0) { np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); ret = VOP_GETATTR(vp, &va, cred); } if ((np->n_flag & NMODIFIED) || ret || np->n_change != va.va_filerev) { (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); ret = VOP_GETATTR(vp, &va, cred); if (!ret) { np->n_mtime = va.va_mtime; np->n_change = va.va_filerev; } } /* Mark that a file lock has been acquired. */ mtx_lock(&np->n_mtx); np->n_flag |= NHASBEENLOCKED; mtx_unlock(&np->n_mtx); } NFSVOPUNLOCK(vp, 0); return (0); } else if (!NFS_ISV4(vp)) { error = NFSVOPLOCK(vp, LK_SHARED); if (error) return (error); if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { size = VTONFS(vp)->n_size; NFSVOPUNLOCK(vp, 0); error = lf_advlock(ap, &(vp->v_lockf), size); } else { if (nfs_advlock_p != NULL) error = nfs_advlock_p(ap); else { NFSVOPUNLOCK(vp, 0); error = ENOLCK; } } if (error == 0 && ap->a_op == F_SETLK) { /* Mark that a file lock has been acquired. */ mtx_lock(&np->n_mtx); np->n_flag |= NHASBEENLOCKED; mtx_unlock(&np->n_mtx); } } return (error); } /* * NFS advisory byte-level locks. */ static int nfs_advlockasync(struct vop_advlockasync_args *ap) { struct vnode *vp = ap->a_vp; u_quad_t size; int error; if (NFS_ISV4(vp)) return (EOPNOTSUPP); error = NFSVOPLOCK(vp, LK_SHARED); if (error) return (error); if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { size = VTONFS(vp)->n_size; NFSVOPUNLOCK(vp, 0); error = lf_advlockasync(ap, &(vp->v_lockf), size); } else { NFSVOPUNLOCK(vp, 0); error = EOPNOTSUPP; } return (error); } /* * Print out the contents of an nfsnode. */ static int nfs_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); ncl_printf("\tfileid %ld fsid 0x%x", np->n_vattr.na_fileid, np->n_vattr.na_fsid); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * This is the "real" nfs::bwrite(struct buf*). * We set B_CACHE if this is a VMIO buffer. */ int ncl_writebp(struct buf *bp, int force __unused, struct thread *td) { int s; int oldflags = bp->b_flags; #if 0 int retv = 1; off_t off; #endif BUF_ASSERT_HELD(bp); if (bp->b_flags & B_INVAL) { brelse(bp); return(0); } bp->b_flags |= B_CACHE; /* * Undirty the bp. We will redirty it later if the I/O fails. */ s = splbio(); bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_iocmd = BIO_WRITE; bufobj_wref(bp->b_bufobj); curthread->td_ru.ru_oublock++; splx(s); /* * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ vfs_busy_pages(bp, 1); BUF_KERNPROC(bp); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); if( (oldflags & B_ASYNC) == 0) { int rtval = bufwait(bp); if (oldflags & B_DELWRI) { s = splbio(); reassignbuf(bp); splx(s); } brelse(bp); return (rtval); } return (0); } /* * nfs special file access vnode op. * Essentially just get vattr and then imitate iaccess() since the device is * local to the client. */ static int nfsspec_access(struct vop_access_args *ap) { struct vattr *vap; struct ucred *cred = ap->a_cred; struct vnode *vp = ap->a_vp; accmode_t accmode = ap->a_accmode; struct vattr vattr; int error; /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } vap = &vattr; error = VOP_GETATTR(vp, vap, cred); if (error) goto out; error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, accmode, cred, NULL); out: return error; } /* * Read wrapper for fifos. */ static int nfsfifo_read(struct vop_read_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); int error; /* * Set access flag. */ mtx_lock(&np->n_mtx); np->n_flag |= NACC; vfs_timestamp(&np->n_atim); mtx_unlock(&np->n_mtx); error = fifo_specops.vop_read(ap); return error; } /* * Write wrapper for fifos. */ static int nfsfifo_write(struct vop_write_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); /* * Set update flag. */ mtx_lock(&np->n_mtx); np->n_flag |= NUPD; vfs_timestamp(&np->n_mtim); mtx_unlock(&np->n_mtx); return(fifo_specops.vop_write(ap)); } /* * Close wrapper for fifos. * * Update the times on the nfsnode then do fifo close. */ static int nfsfifo_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; struct timespec ts; mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) { vfs_timestamp(&ts); if (np->n_flag & NACC) np->n_atim = ts; if (np->n_flag & NUPD) np->n_mtim = ts; np->n_flag |= NCHG; if (vrefcnt(vp) == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; mtx_unlock(&np->n_mtx); (void)VOP_SETATTR(vp, &vattr, ap->a_cred); goto out; } } mtx_unlock(&np->n_mtx); out: return (fifo_specops.vop_close(ap)); } /* * Just call ncl_writebp() with the force argument set to 1. * * NOTE: B_DONE may or may not be set in a_bp on call. */ static int nfs_bwrite(struct buf *bp) { return (ncl_writebp(bp, 1, curthread)); } struct buf_ops buf_ops_newnfs = { .bop_name = "buf_ops_nfs", .bop_write = nfs_bwrite, .bop_strategy = bufstrategy, .bop_sync = bufsync, .bop_bdflush = bufbdflush, }; - -/* - * Cloned from vop_stdlock(), and then the ugly hack added. - */ -static int -nfs_lock1(struct vop_lock1_args *ap) -{ - struct vnode *vp = ap->a_vp; - int error = 0; - - /* - * Since vfs_hash_get() calls vget() and it will no longer work - * for FreeBSD8 with flags == 0, I can only think of this horrible - * hack to work around it. I call vfs_hash_get() with LK_EXCLOTHER - * and then handle it here. All I want for this case is a v_usecount - * on the vnode to use for recovery, while another thread might - * hold a lock on the vnode. I have the other threads blocked, so - * there isn't any race problem. - */ - if ((ap->a_flags & LK_TYPE_MASK) == LK_EXCLOTHER) { - if ((ap->a_flags & LK_INTERLOCK) == 0) - panic("ncllock1"); - if ((vp->v_iflag & VI_DOOMED)) - error = ENOENT; - VI_UNLOCK(vp); - return (error); - } - return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp), - LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file, - ap->a_line)); -} static int nfs_getacl(struct vop_getacl_args *ap) { int error; if (ap->a_type != ACL_TYPE_NFS4) return (EOPNOTSUPP); error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, NULL); if (error > NFSERR_STALE) { (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); error = EPERM; } return (error); } static int nfs_setacl(struct vop_setacl_args *ap) { int error; if (ap->a_type != ACL_TYPE_NFS4) return (EOPNOTSUPP); error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, NULL); if (error > NFSERR_STALE) { (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); error = EPERM; } return (error); } /* * Return POSIX pathconf information applicable to nfs filesystems. */ static int nfs_pathconf(struct vop_pathconf_args *ap) { struct nfsv3_pathconf pc; struct nfsvattr nfsva; struct vnode *vp = ap->a_vp; struct thread *td = curthread; int attrflag, error; if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || ap->a_name == _PC_NO_TRUNC)) || (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) { /* * Since only the above 4 a_names are returned by the NFSv3 * Pathconf RPC, there is no point in doing it for others. * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can * be used for _PC_NFS4_ACL as well. */ error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva, &attrflag, NULL); if (attrflag != 0) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (error != 0) return (error); } else { /* * For NFSv2 (or NFSv3 when not one of the above 4 a_names), * just fake them. */ pc.pc_linkmax = LINK_MAX; pc.pc_namemax = NFS_MAXNAMLEN; pc.pc_notrunc = 1; pc.pc_chownrestricted = 1; pc.pc_caseinsensitive = 0; pc.pc_casepreserving = 1; error = 0; } switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = pc.pc_linkmax; break; case _PC_NAME_MAX: *ap->a_retval = pc.pc_namemax; break; case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; break; case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = pc.pc_chownrestricted; break; case _PC_NO_TRUNC: *ap->a_retval = pc.pc_notrunc; break; case _PC_ACL_EXTENDED: *ap->a_retval = 0; break; case _PC_ACL_NFS4: if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) *ap->a_retval = 1; else *ap->a_retval = 0; break; case _PC_ACL_PATH_MAX: if (NFS_ISV4(vp)) *ap->a_retval = ACL_MAX_ENTRIES; else *ap->a_retval = 3; break; case _PC_MAC_PRESENT: *ap->a_retval = 0; break; case _PC_ASYNC_IO: /* _PC_ASYNC_IO should have been handled by upper layers. */ KASSERT(0, ("_PC_ASYNC_IO should not get here")); error = EINVAL; break; case _PC_PRIO_IO: *ap->a_retval = 0; break; case _PC_SYNC_IO: *ap->a_retval = 0; break; case _PC_ALLOC_SIZE_MIN: *ap->a_retval = vp->v_mount->mnt_stat.f_bsize; break; case _PC_FILESIZEBITS: if (NFS_ISV34(vp)) *ap->a_retval = 64; else *ap->a_retval = 32; break; case _PC_REC_INCR_XFER_SIZE: *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_MAX_XFER_SIZE: *ap->a_retval = -1; /* means ``unlimited'' */ break; case _PC_REC_MIN_XFER_SIZE: *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_XFER_ALIGN: *ap->a_retval = PAGE_SIZE; break; case _PC_SYMLINK_MAX: *ap->a_retval = NFS_MAXPATHLEN; break; default: error = EINVAL; break; } return (error); }