Index: projects/nfs-over-tls/sys/fs/nfs/nfsdport.h =================================================================== --- projects/nfs-over-tls/sys/fs/nfs/nfsdport.h (revision 364495) +++ projects/nfs-over-tls/sys/fs/nfs/nfsdport.h (revision 364496) @@ -1,121 +1,121 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * These macros handle nfsvattr fields. They look a bit silly here, but * are quite different for the Darwin port. */ #define NFSVNO_ATTRINIT(n) (VATTR_NULL(&((n)->na_vattr))) #define NFSVNO_SETATTRVAL(n, f, v) ((n)->na_##f = (v)) #define NFSVNO_SETACTIVE(n, f) #define NFSVNO_UNSET(n, f) ((n)->na_##f = VNOVAL) #define NFSVNO_NOTSETMODE(n) ((n)->na_mode == ((mode_t)VNOVAL)) #define NFSVNO_ISSETMODE(n) ((n)->na_mode != ((mode_t)VNOVAL)) #define NFSVNO_NOTSETUID(n) ((n)->na_uid == ((uid_t)VNOVAL)) #define NFSVNO_ISSETUID(n) ((n)->na_uid != ((uid_t)VNOVAL)) #define NFSVNO_NOTSETGID(n) ((n)->na_gid == ((gid_t)VNOVAL)) #define NFSVNO_ISSETGID(n) ((n)->na_gid != ((gid_t)VNOVAL)) #define NFSVNO_NOTSETSIZE(n) ((n)->na_size == VNOVAL) #define NFSVNO_ISSETSIZE(n) ((n)->na_size != VNOVAL) #define NFSVNO_NOTSETATIME(n) ((n)->na_atime.tv_sec == VNOVAL) #define NFSVNO_ISSETATIME(n) ((n)->na_atime.tv_sec != VNOVAL) #define NFSVNO_NOTSETMTIME(n) ((n)->na_mtime.tv_sec == VNOVAL) #define NFSVNO_ISSETMTIME(n) ((n)->na_mtime.tv_sec != VNOVAL) /* * This structure acts as a "catch-all" for information that * needs to be returned by nfsd_fhtovp(). */ struct nfsexstuff { uint64_t nes_exflag; /* export flags */ int nes_numsecflavor; /* # of security flavors */ int nes_secflavors[MAXSECFLAVORS]; /* and the flavors */ }; /* * These are NO-OPS for BSD until Isilon upstreams EXITCODE support. * EXITCODE is an in-memory ring buffer that holds the routines failing status. * This is a valuable tool to use when debugging and analyzing issues. * In addition to recording a routine's failing status, it offers * logging of routines for call stack tracing. * EXITCODE should be used only in routines that return a true errno value, as * that value will be formatted to a displayable errno string. Routines that * return regular int status that are not true errno should not set EXITCODE. * If you want to log routine tracing, you can add EXITCODE(0) to any routine. * NFS extended the EXITCODE with EXITCODE2 to record either the routine's * exit errno status or the nd_repstat. */ #define NFSEXITCODE(error) #define NFSEXITCODE2(error, nd) #define NFSVNO_EXINIT(e) ((e)->nes_exflag = 0) #define NFSVNO_EXPORTED(e) ((e)->nes_exflag & MNT_EXPORTED) #define NFSVNO_EXRDONLY(e) ((e)->nes_exflag & MNT_EXRDONLY) #define NFSVNO_EXPORTANON(e) ((e)->nes_exflag & MNT_EXPORTANON) #define NFSVNO_EXSTRICTACCESS(e) ((e)->nes_exflag & MNT_EXSTRICTACCESS) #define NFSVNO_EXV4ONLY(e) ((e)->nes_exflag & MNT_EXV4ONLY) -#define NFSVNO_EXTLS(e) ((e)->nes_exflag & MNTEX_TLS) -#define NFSVNO_EXTLSCERT(e) ((e)->nes_exflag & MNTEX_TLSCERT) -#define NFSVNO_EXTLSCERTUSER(e) ((e)->nes_exflag & MNTEX_TLSCERTUSER) +#define NFSVNO_EXTLS(e) ((e)->nes_exflag & MNT_EXTLS) +#define NFSVNO_EXTLSCERT(e) ((e)->nes_exflag & MNT_EXTLSCERT) +#define NFSVNO_EXTLSCERTUSER(e) ((e)->nes_exflag & MNT_EXTLSCERTUSER) #define NFSVNO_SETEXRDONLY(e) ((e)->nes_exflag = (MNT_EXPORTED|MNT_EXRDONLY)) #define NFSVNO_CMPFH(f1, f2) \ (fsidcmp(&(f1)->fh_fsid, &(f2)->fh_fsid) == 0 && \ bcmp(&(f1)->fh_fid, &(f2)->fh_fid, sizeof(struct fid)) == 0) #define NFSLOCKHASH(f) \ (&nfslockhash[nfsrv_hashfh(f) % nfsrv_lockhashsize]) #define NFSFPVNODE(f) ((struct vnode *)((f)->f_data)) #define NFSFPCRED(f) ((f)->f_cred) #define NFSFPFLAG(f) ((f)->f_flag) #define NFSNAMEICNDSET(n, c, o, f) do { \ (n)->cn_cred = (c); \ (n)->cn_nameiop = (o); \ (n)->cn_flags = (f); \ } while (0) #define NFSPATHLEN_T size_t /* * These are set to the minimum and maximum size of a server file * handle. */ #define NFSRV_MINFH (sizeof (fhandle_t)) #define NFSRV_MAXFH (sizeof (fhandle_t)) /* Use this macro for debug printfs. */ #define NFSD_DEBUG(level, ...) do { \ if (nfsd_debuglevel >= (level)) \ printf(__VA_ARGS__); \ } while (0) Index: projects/nfs-over-tls/sys/fs/nfsserver/nfs_nfsdport.c =================================================================== --- projects/nfs-over-tls/sys/fs/nfsserver/nfs_nfsdport.c (revision 364495) +++ projects/nfs-over-tls/sys/fs/nfsserver/nfs_nfsdport.c (revision 364496) @@ -1,6715 +1,6715 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include /* * Functions that perform the vfs operations required by the routines in * nfsd_serv.c. It is hoped that this change will make the server more * portable. */ #include #include #include #include #include #include #include FEATURE(nfsd, "NFSv4 server"); extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern int nfsrv_useacl; extern int newnfs_numnfsd; extern struct mount nfsv4root_mnt; extern struct nfsrv_stablefirst nfsrv_stablefirst; extern void (*nfsd_call_servertimer)(void); extern SVCPOOL *nfsrvd_pool; extern struct nfsv4lock nfsd_suspend_lock; extern struct nfsclienthashhead *nfsclienthash; extern struct nfslockhashhead *nfslockhash; extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; extern struct nfsstatsv1 nfsstatsv1; extern struct nfslayouthash *nfslayouthash; extern int nfsrv_layouthashsize; extern struct mtx nfsrv_dslock_mtx; extern int nfs_pnfsiothreads; extern struct nfsdontlisthead nfsrv_dontlisthead; extern volatile int nfsrv_dontlistlen; extern volatile int nfsrv_devidcnt; extern int nfsrv_maxpnfsmirror; struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; NFSDLOCKMUTEX; NFSSTATESPINLOCK; struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; struct mtx nfsrc_udpmtx; struct mtx nfs_v4root_mutex; struct mtx nfsrv_dontlistlock_mtx; struct mtx nfsrv_recalllock_mtx; struct nfsrvfh nfs_rootfh, nfs_pubfh; int nfs_pubfhset = 0, nfs_rootfhset = 0; struct proc *nfsd_master_proc = NULL; int nfsd_debuglevel = 0; static pid_t nfsd_master_pid = (pid_t)-1; static char nfsd_master_comm[MAXCOMLEN + 1]; static struct timeval nfsd_master_start; static uint32_t nfsv4_sysid = 0; static fhandle_t zerofh; static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, struct ucred *); int nfsrv_enable_crossmntpt = 1; static int nfs_commit_blks; static int nfs_commit_miss; extern int nfsrv_issuedelegs; extern int nfsrv_dolocallocks; extern int nfsd_enable_stringtouid; extern struct nfsdevicehead nfsrv_devidhead; static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **, struct iovec **); static int nfsrv_createiovec_extpgs(int, int, struct mbuf **, struct mbuf **, struct iovec **); static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **, int *); static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, NFSPROC_T *); static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, int *, char *, fhandle_t *); static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, NFSPROC_T *); static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *, struct thread *, int, struct mbuf **, char *, struct mbuf **, struct nfsvattr *, struct acl *, off_t *, int, bool *); static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, char *, int *); static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct acl *, int *); static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount *, struct nfsvattr *); static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *, NFSPROC_T *, struct nfsmount *); static int nfsrv_putfhname(fhandle_t *, char *); static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, struct pnfsdsfile *, struct vnode **, NFSPROC_T *); static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *, struct vnode *, NFSPROC_T *); static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, NFSPROC_T *); static int nfsrv_pnfsstatfs(struct statfs *, struct mount *); static void nfsm_trimtrailing(struct nfsrv_descript *, struct mbuf *, char *, int, int); int nfs_pnfsio(task_fn_t *, void *); SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 0, "Debug level for NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); static int nfsrv_pnfsgetdsattr = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); /* * nfsrv_dsdirsize can only be increased and only when the nfsd threads are * not running. * The dsN subdirectories for the increased values must have been created * on all DS servers before this increase is done. */ u_int nfsrv_dsdirsize = 20; static int sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) { int error, newdsdirsize; newdsdirsize = nfsrv_dsdirsize; error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || newnfs_numnfsd != 0) return (EINVAL); nfsrv_dsdirsize = newdsdirsize; return (0); } SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize), sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers"); #define MAX_REORDERED_RPC 16 #define NUM_HEURISTIC 1031 #define NHUSE_INIT 64 #define NHUSE_INC 16 #define NHUSE_MAX 2048 static struct nfsheur { struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ off_t nh_nextoff; /* next offset for sequential detection */ int nh_use; /* use count for selection */ int nh_seqcount; /* heuristic */ } nfsheur[NUM_HEURISTIC]; /* * Heuristic to detect sequential operation. */ static struct nfsheur * nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) { struct nfsheur *nh; int hi, try; /* Locate best candidate. */ try = 32; hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; nh = &nfsheur[hi]; while (try--) { if (nfsheur[hi].nh_vp == vp) { nh = &nfsheur[hi]; break; } if (nfsheur[hi].nh_use > 0) --nfsheur[hi].nh_use; hi = (hi + 1) % NUM_HEURISTIC; if (nfsheur[hi].nh_use < nh->nh_use) nh = &nfsheur[hi]; } /* Initialize hint if this is a new file. */ if (nh->nh_vp != vp) { nh->nh_vp = vp; nh->nh_nextoff = uio->uio_offset; nh->nh_use = NHUSE_INIT; if (uio->uio_offset == 0) nh->nh_seqcount = 4; else nh->nh_seqcount = 1; } /* Calculate heuristic. */ if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || uio->uio_offset == nh->nh_nextoff) { /* See comments in vfs_vnops.c:sequential_heuristic(). */ nh->nh_seqcount += howmany(uio->uio_resid, 16384); if (nh->nh_seqcount > IO_SEQMAX) nh->nh_seqcount = IO_SEQMAX; } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { /* Probably a reordered RPC, leave seqcount alone. */ } else if (nh->nh_seqcount > 1) { nh->nh_seqcount /= 2; } else { nh->nh_seqcount = 0; } nh->nh_use += NHUSE_INC; if (nh->nh_use > NHUSE_MAX) nh->nh_use = NHUSE_MAX; return (nh); } /* * Get attributes into nfsvattr structure. */ int nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct nfsrv_descript *nd, struct thread *p, int vpislocked, nfsattrbit_t *attrbitp) { int error, gotattr, lockedit = 0; struct nfsvattr na; if (vpislocked == 0) { /* * When vpislocked == 0, the vnode is either exclusively * locked by this thread or not locked by this thread. * As such, shared lock it, if not exclusively locked. */ if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { lockedit = 1; NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); } } /* * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed * attributes, as required. * This needs to be done for regular files if: * - non-NFSv4 RPCs or * - when attrbitp == NULL or * - an NFSv4 RPC with any of the above attributes in attrbitp. * A return of 0 for nfsrv_proxyds() indicates that it has acquired * these attributes. nfsrv_proxyds() will return an error if the * server is not a pNFS one. */ gotattr = 0; if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || (nd->nd_flag & ND_NFSV4) == 0 || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) { error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p, NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0, NULL); if (error == 0) gotattr = 1; } error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); if (lockedit != 0) NFSVOPUNLOCK(vp); /* * If we got the Change, Size and Modify Time from the DS, * replace them. */ if (gotattr != 0) { nvap->na_atime = na.na_atime; nvap->na_mtime = na.na_mtime; nvap->na_filerev = na.na_filerev; nvap->na_size = na.na_size; nvap->na_bytes = na.na_bytes; } NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, error, (uintmax_t)na.na_filerev); NFSEXITCODE(error); return (error); } /* * Get a file handle for a vnode. */ int nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) { int error; NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VOP_VPTOFH(vp, &fhp->fh_fid); NFSEXITCODE(error); return (error); } /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use * vn_writechk() and VOP_ACCESSX() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write * case. * 2 - The owner is to be given access irrespective of mode bits for some * operations, so that processes that chmod after opening a file don't * break. */ int nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, u_int32_t *supportedtypep) { struct vattr vattr; int error = 0, getret = 0; if (vpislocked == 0) { if (NFSVOPLOCK(vp, LK_SHARED) != 0) { error = EPERM; goto out; } } if (accmode & VWRITE) { /* Just vn_writechk() changed to check rdonly */ /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character * device resident on the file system. */ if (NFSVNO_EXRDONLY(exp) || (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: error = EROFS; default: break; } } /* * If there's shared text associated with * the inode, try to free it up once. If * we fail, we can't allow writing. */ if (VOP_IS_TEXT(vp) && error == 0) error = ETXTBSY; } if (error != 0) { if (vpislocked == 0) NFSVOPUNLOCK(vp); goto out; } /* * Should the override still be applied when ACLs are enabled? */ error = VOP_ACCESSX(vp, accmode, cred, p); if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { /* * Try again with VEXPLICIT_DENY, to see if the test for * deletion is supported. */ error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); if (error == 0) { if (vp->v_type == VDIR) { accmode &= ~(VDELETE | VDELETE_CHILD); accmode |= VWRITE; error = VOP_ACCESSX(vp, accmode, cred, p); } else if (supportedtypep != NULL) { *supportedtypep &= ~NFSACCESS_DELETE; } } } /* * Allow certain operations for the owner (reads and writes * on files that are already open). */ if (override != NFSACCCHK_NOOVERRIDE && (error == EPERM || error == EACCES)) { if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) error = 0; else if (override & NFSACCCHK_ALLOWOWNER) { getret = VOP_GETATTR(vp, &vattr, cred); if (getret == 0 && cred->cr_uid == vattr.va_uid) error = 0; } } if (vpislocked == 0) NFSVOPUNLOCK(vp); out: NFSEXITCODE(error); return (error); } /* * Set attribute(s) vnop. */ int nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { u_quad_t savsize = 0; int error, savedit; time_t savbtime; /* * If this is an exported file system and a pNFS service is running, * don't VOP_SETATTR() of size for the MDS file system. */ savedit = 0; error = 0; if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 && nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL && nvap->na_vattr.va_size > 0) { savsize = nvap->na_vattr.va_size; nvap->na_vattr.va_size = VNOVAL; if (nvap->na_vattr.va_uid != (uid_t)VNOVAL || nvap->na_vattr.va_gid != (gid_t)VNOVAL || nvap->na_vattr.va_mode != (mode_t)VNOVAL || nvap->na_vattr.va_atime.tv_sec != VNOVAL || nvap->na_vattr.va_mtime.tv_sec != VNOVAL) savedit = 1; else savedit = 2; } if (savedit != 2) error = VOP_SETATTR(vp, &nvap->na_vattr, cred); if (savedit != 0) nvap->na_vattr.va_size = savsize; if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || nvap->na_vattr.va_gid != (gid_t)VNOVAL || nvap->na_vattr.va_size != VNOVAL || nvap->na_vattr.va_mode != (mode_t)VNOVAL || nvap->na_vattr.va_atime.tv_sec != VNOVAL || nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { /* Never modify birthtime on a DS file. */ savbtime = nvap->na_vattr.va_birthtime.tv_sec; nvap->na_vattr.va_birthtime.tv_sec = VNOVAL; /* For a pNFS server, set the attributes on the DS file. */ error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR, NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL); nvap->na_vattr.va_birthtime.tv_sec = savbtime; if (error == ENOENT) error = 0; } NFSEXITCODE(error); return (error); } /* * Set up nameidata for a lookup() call and do it. */ int nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, struct vnode **retdirp) { struct componentname *cnp = &ndp->ni_cnd; int i; struct iovec aiov; struct uio auio; int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; int error = 0; char *cp; *retdirp = NULL; cnp->cn_nameptr = cnp->cn_pnbuf; ndp->ni_lcf = 0; /* * Extract and set starting directory. */ if (dp->v_type != VDIR) { if (islocked) vput(dp); else vrele(dp); nfsvno_relpathbuf(ndp); error = ENOTDIR; goto out1; } if (islocked) NFSVOPUNLOCK(dp); VREF(dp); *retdirp = dp; if (NFSVNO_EXRDONLY(exp)) cnp->cn_flags |= RDONLY; ndp->ni_segflg = UIO_SYSSPACE; if (nd->nd_flag & ND_PUBLOOKUP) { ndp->ni_loopcnt = 0; if (cnp->cn_pnbuf[0] == '/') { vrele(dp); /* * Check for degenerate pathnames here, since lookup() * panics on them. */ for (i = 1; i < ndp->ni_pathlen; i++) if (cnp->cn_pnbuf[i] != '/') break; if (i == ndp->ni_pathlen) { error = NFSERR_ACCES; goto out; } dp = rootvnode; VREF(dp); } } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || (nd->nd_flag & ND_NFSV4) == 0) { /* * Only cross mount points for NFSv4 when doing a * mount while traversing the file system above * the mount point, unless nfsrv_enable_crossmntpt is set. */ cnp->cn_flags |= NOCROSSMOUNT; } /* * Initialize for scan, set ni_startdir and bump ref on dp again * because lookup() will dereference ni_startdir. */ cnp->cn_thread = p; ndp->ni_startdir = dp; ndp->ni_rootdir = rootvnode; ndp->ni_topdir = NULL; if (!lockleaf) cnp->cn_flags |= LOCKLEAF; for (;;) { cnp->cn_nameptr = cnp->cn_pnbuf; /* * Call lookup() to do the real work. If an error occurs, * ndp->ni_vp and ni_dvp are left uninitialized or NULL and * we do not have to dereference anything before returning. * In either case ni_startdir will be dereferenced and NULLed * out. */ error = lookup(ndp); if (error) break; /* * Check for encountering a symbolic link. Trivial * termination occurs if no symlink encountered. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) nfsvno_relpathbuf(ndp); if (ndp->ni_vp && !lockleaf) NFSVOPUNLOCK(ndp->ni_vp); break; } /* * Validate symlink */ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) NFSVOPUNLOCK(ndp->ni_dvp); if (!(nd->nd_flag & ND_PUBLOOKUP)) { error = EINVAL; goto badlink2; } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; goto badlink2; } if (ndp->ni_pathlen > 1) cp = uma_zalloc(namei_zone, M_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = NULL; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { badlink1: if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); badlink2: vrele(ndp->ni_dvp); vput(ndp->ni_vp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { error = ENOENT; goto badlink1; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { error = ENAMETOOLONG; goto badlink1; } /* * Adjust or replace path */ if (ndp->ni_pathlen > 1) { NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; /* * Cleanup refs for next loop and check if root directory * should replace current directory. Normally ni_dvp * becomes the new base directory and is cleaned up when * we loop. Explicitly null pointers after invalidation * to clarify operation. */ vput(ndp->ni_vp); ndp->ni_vp = NULL; if (cnp->cn_pnbuf[0] == '/') { vrele(ndp->ni_dvp); ndp->ni_dvp = ndp->ni_rootdir; VREF(ndp->ni_dvp); } ndp->ni_startdir = ndp->ni_dvp; ndp->ni_dvp = NULL; } if (!lockleaf) cnp->cn_flags &= ~LOCKLEAF; out: if (error) { nfsvno_relpathbuf(ndp); ndp->ni_vp = NULL; ndp->ni_dvp = NULL; ndp->ni_startdir = NULL; } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { ndp->ni_dvp = NULL; } out1: NFSEXITCODE2(error, nd); return (error); } /* * Set up a pathname buffer and return a pointer to it and, optionally * set a hash pointer. */ void nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) { struct componentname *cnp = &ndp->ni_cnd; cnp->cn_flags |= (NOMACCHECK | HASBUF); cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); if (hashpp != NULL) *hashpp = NULL; *bufpp = cnp->cn_pnbuf; } /* * Release the above path buffer, if not released by nfsvno_namei(). */ void nfsvno_relpathbuf(struct nameidata *ndp) { if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) panic("nfsrelpath"); uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } /* * Readlink vnode op into an mbuf list. */ int nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) { struct iovec *iv; struct uio io, *uiop = &io; struct mbuf *mp, *mp3; int len, tlen, error = 0; len = NFS_MAXPATHLEN; if (maxextsiz > 0) uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, &mp3, &mp, &iv); else uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv); uiop->uio_iov = iv; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; error = VOP_READLINK(vp, uiop, cred); free(iv, M_TEMP); if (error) { m_freem(mp3); *lenp = 0; goto out; } if (uiop->uio_resid > 0) { len -= uiop->uio_resid; tlen = NFSM_RNDUP(len); if (tlen == 0) { m_freem(mp3); mp3 = mp = NULL; } else if (tlen != NFS_MAXPATHLEN || tlen != len) mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len); } *lenp = len; *mpp = mp3; *mpendp = mp; out: NFSEXITCODE(error); return (error); } /* * Create an mbuf chain and an associated iovec that can be used to Read * or Getextattr of data. * Upon success, return pointers to the first and last mbufs in the chain * plus the malloc'd iovec and its iovlen. */ static int nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp, struct iovec **ivp) { struct mbuf *m, *m2 = NULL, *m3; struct iovec *iv; int i, left, siz; left = len; m3 = NULL; /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; while (left > 0) { NFSMGET(m); MCLGET(m, M_WAITOK); m->m_len = 0; siz = min(M_TRAILINGSPACE(m), left); left -= siz; i++; if (m3) m2->m_next = m; else m3 = m; m2 = m; } *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); m = m3; left = len; i = 0; while (left > 0) { if (m == NULL) panic("nfsrv_createiovec iov"); siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { iv->iov_base = mtod(m, caddr_t) + m->m_len; iv->iov_len = siz; m->m_len += siz; left -= siz; iv++; i++; } m = m->m_next; } *mpp = m3; *mpendp = m2; return (i); } /* * Create an mbuf chain and an associated iovec that can be used to Read * or Getextattr of data. * Upon success, return pointers to the first and last mbufs in the chain * plus the malloc'd iovec and its iovlen. * Same as above, but creates ext_pgs mbuf(s). */ static int nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp, struct mbuf **mpendp, struct iovec **ivp) { struct mbuf *m, *m2 = NULL, *m3; struct iovec *iv; int i, left, pgno, siz; left = len; m3 = NULL; /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; while (left > 0) { siz = min(left, maxextsiz); m = mb_alloc_ext_plus_pages(siz, M_WAITOK); left -= siz; i += m->m_epg_npgs; if (m3 != NULL) m2->m_next = m; else m3 = m; m2 = m; } *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); m = m3; left = len; i = 0; pgno = 0; while (left > 0) { if (m == NULL) panic("nfsvno_createiovec_extpgs iov"); siz = min(PAGE_SIZE, left); if (siz > 0) { iv->iov_base = (void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]); iv->iov_len = siz; m->m_len += siz; if (pgno == m->m_epg_npgs - 1) m->m_epg_last_len = siz; left -= siz; iv++; i++; pgno++; } if (pgno == m->m_epg_npgs && left > 0) { m = m->m_next; if (m == NULL) panic("nfsvno_createiovec_extpgs iov"); pgno = 0; } } *mpp = m3; *mpendp = m2; return (i); } /* * Read vnode op call into mbuf list. */ int nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, int maxextsiz, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) { struct mbuf *m; struct iovec *iv; int error = 0, len, tlen, ioflag = 0; struct mbuf *m3; struct uio io, *uiop = &io; struct nfsheur *nh; /* * Attempt to read from a DS file. A return of ENOENT implies * there is no DS file to read. */ error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, NULL, mpendp, NULL, NULL, NULL, 0, NULL); if (error != ENOENT) return (error); len = NFSM_RNDUP(cnt); if (maxextsiz > 0) uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, &m3, &m, &iv); else uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv); uiop->uio_iov = iv; uiop->uio_offset = off; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; nh = nfsrv_sequential_heuristic(uiop, vp); ioflag |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); free(iv, M_TEMP); if (error) { m_freem(m3); *mpp = NULL; goto out; } nh->nh_nextoff = uiop->uio_offset; tlen = len - uiop->uio_resid; cnt = cnt < tlen ? cnt : tlen; tlen = NFSM_RNDUP(cnt); if (tlen == 0) { m_freem(m3); m3 = m = NULL; } else if (len != tlen || tlen != cnt) m = nfsrv_adj(m3, len - tlen, tlen - cnt); *mpp = m3; *mpendp = m; out: NFSEXITCODE(error); return (error); } /* * Create the iovec for the mbuf chain passed in as an argument. * The "cp" argument is where the data starts within the first mbuf in * the chain. It returns the iovec and the iovcnt. */ static int nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp, int *iovcntp) { struct mbuf *mp; struct iovec *ivp; int cnt, i, len; /* * Loop through the mbuf chain, counting how many mbufs are a * part of this write operation, so the iovec size is known. */ cnt = 0; len = retlen; mp = m; i = mtod(mp, caddr_t) + mp->m_len - cp; while (len > 0) { if (i > 0) { len -= i; cnt++; } mp = mp->m_next; if (!mp) { if (len > 0) return (EBADRPC); } else i = mp->m_len; } /* Now, create the iovec. */ mp = m; *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); *iovcntp = cnt; i = mtod(mp, caddr_t) + mp->m_len - cp; len = retlen; while (len > 0) { if (mp == NULL) panic("nfsrv_createiovecw"); if (i > 0) { i = min(i, len); ivp->iov_base = cp; ivp->iov_len = i; ivp++; len -= i; } mp = mp->m_next; if (mp) { i = mp->m_len; cp = mtod(mp, caddr_t); } } return (0); } /* * Write vnode op from an mbuf list. */ int nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable, struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) { struct iovec *iv; int cnt, ioflags, error; struct uio io, *uiop = &io; struct nfsheur *nh; /* * Attempt to write to a DS file. A return of ENOENT implies * there is no DS file to write. */ error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS, &mp, cp, NULL, NULL, NULL, NULL, 0, NULL); if (error != ENOENT) { *stable = NFSWRITE_FILESYNC; return (error); } if (*stable == NFSWRITE_UNSTABLE) ioflags = IO_NODELOCKED; else ioflags = (IO_SYNC | IO_NODELOCKED); error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt); if (error != 0) return (error); uiop->uio_iov = iv; uiop->uio_iovcnt = cnt; uiop->uio_resid = retlen; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; NFSUIOPROC(uiop, p); uiop->uio_offset = off; nh = nfsrv_sequential_heuristic(uiop, vp); ioflags |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; error = VOP_WRITE(vp, uiop, ioflags, cred); if (error == 0) nh->nh_nextoff = uiop->uio_offset; free(iv, M_TEMP); NFSEXITCODE(error); return (error); } /* * Common code for creating a regular file (plus special files for V2). */ int nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp) { u_quad_t tempsize; int error; struct thread *p = curthread; error = nd->nd_repstat; if (!error && ndp->ni_vp == NULL) { if (nvap->na_type == VREG || nvap->na_type == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); /* For a pNFS server, create the data file on a DS. */ if (error == 0 && nvap->na_type == VREG) { /* * Create a data file on a DS for a pNFS server. * This function just returns if not * running a pNFS DS or the creation fails. */ nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, nd->nd_cred, p); } vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); if (!error) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; error = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, nd->nd_cred); if (error != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; error = NFSERR_NOTSUPP; } } } /* * NFS V2 Only. nfsrvd_mknod() does this for V3. * (This implies, just get out on an error.) */ } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || nvap->na_type == VFIFO) { if (nvap->na_type == VCHR && rdev == 0xffffffff) nvap->na_type = VFIFO; if (nvap->na_type != VFIFO && (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } nvap->na_rdev = rdev; error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); if (error) goto out; } else { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = ENXIO; goto out; } *vpp = ndp->ni_vp; } else { /* * Handle cases where error is already set and/or * the file exists. * 1 - clean up the lookup * 2 - iff !error and na_size set, truncate it */ vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); *vpp = ndp->ni_vp; if (ndp->ni_dvp == *vpp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (!error && nvap->na_size != VNOVAL) { error = nfsvno_accchk(*vpp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (!error) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; error = VOP_SETATTR(*vpp, &nvap->na_vattr, nd->nd_cred); } } if (error) vput(*vpp); } out: NFSEXITCODE(error); return (error); } /* * Do a mknod vnode op. */ int nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p) { int error = 0; enum vtype vtyp; vtyp = nvap->na_type; /* * Iff doesn't exist, create it. */ if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = NFSERR_BADTYPE; goto out; } if (vtyp == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); } else { if (nvap->na_type != VFIFO && (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); /* * Since VOP_MKNOD returns the ni_vp, I can't * see any reason to do the lookup. */ } out: NFSEXITCODE(error); return (error); } /* * Mkdir vnode op. */ int nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp != NULL) { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); nfsvno_relpathbuf(ndp); error = EEXIST; goto out; } error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); out: NFSEXITCODE(error); return (error); } /* * symlink vnode op. */ int nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr, pathcp); vput(ndp->ni_dvp); vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); /* * Although FreeBSD still had the lookup code in * it for 7/current, there doesn't seem to be any * point, since VOP_SYMLINK() returns the ni_vp. * Just vput it for v2. */ if (!not_v2 && !error) vput(ndp->ni_vp); out: NFSEXITCODE(error); return (error); } /* * Parse symbolic link arguments. * This function has an ugly side effect. It will malloc() an area for * the symlink and set iov_base to point to it, only if it succeeds. * So, if it returns with uiop->uio_iov->iov_base != NULL, that must * be FREE'd later. */ int nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, struct thread *p, char **pathcpp, int *lenp) { u_int32_t *tl; char *pathcp = NULL; int error = 0, len; struct nfsv2_sattr *sp; *pathcpp = NULL; *lenp = 0; if ((nd->nd_flag & ND_NFSV3) && (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len > NFS_MAXPATHLEN || len <= 0) { error = EBADRPC; goto nfsmout; } pathcp = malloc(len + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, pathcp, len); if (error) goto nfsmout; if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); } *pathcpp = pathcp; *lenp = len; NFSEXITCODE2(0, nd); return (0); nfsmout: if (pathcp) free(pathcp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Remove a non-directory object. */ int nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS]; int error = 0, mirrorcnt; char fname[PNFS_FILENAME_LEN + 1]; fhandle_t fh; vp = ndp->ni_vp; dsdvp[0] = NULL; if (vp->v_type == VDIR) error = NFSERR_ISDIR; else if (is_v4) error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0), p); if (error == 0) nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); if (!error) error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); if (error == 0 && dsdvp[0] != NULL) nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Remove a directory. */ int nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp; int error = 0; vp = ndp->ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (ndp->ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_vflag & VV_ROOT) error = EBUSY; out: if (!error) error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Rename vnode op. */ int nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) { struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS]; int error = 0, mirrorcnt; char fname[PNFS_FILENAME_LEN + 1]; fhandle_t fh; dsdvp[0] = NULL; fvp = fromndp->ni_vp; if (ndstat) { vrele(fromndp->ni_dvp); vrele(fvp); error = ndstat; goto out1; } tdvp = tondp->ni_dvp; tvp = tondp->ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; goto out; } if (tvp->v_type == VDIR && tvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } /* * A rename to '.' or '..' results in a prematurely * unlocked vnode on FreeBSD5, so I'm just going to fail that * here. */ if ((tondp->ni_cnd.cn_namelen == 1 && tondp->ni_cnd.cn_nameptr[0] == '.') || (tondp->ni_cnd.cn_namelen == 2 && tondp->ni_cnd.cn_nameptr[0] == '.' && tondp->ni_cnd.cn_nameptr[1] == '.')) { error = EINVAL; goto out; } } if (fvp->v_type == VDIR && fvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp->v_mount != tdvp->v_mount) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp == tdvp) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; goto out; } if (fvp == tvp) { /* * If source and destination are the same, there is nothing to * do. Set error to -1 to indicate this. */ error = -1; goto out; } if (ndflag & ND_NFSV4) { if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(fvp, 0, NULL, (nfsquad_t)((u_quad_t)0), p); NFSVOPUNLOCK(fvp); } else error = EPERM; if (tvp && !error) error = nfsrv_checkremove(tvp, 1, NULL, (nfsquad_t)((u_quad_t)0), p); } else { /* * For NFSv2 and NFSv3, try to get rid of the delegation, so * that the NFSv4 client won't be confused by the rename. * Since nfsd_recalldelegation() can only be called on an * unlocked vnode at this point and fvp is the file that will * still exist after the rename, just do fvp. */ nfsd_recalldelegation(fvp, p); } if (error == 0 && tvp != NULL) { nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh); NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" " dsdvp=%p\n", dsdvp[0]); } out: if (!error) { error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, &tondp->ni_cnd); } else { if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fromndp->ni_dvp); vrele(fvp); if (error == -1) error = 0; } /* * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and * if the rename succeeded, the DS file for the tvp needs to be * removed. */ if (error == 0 && dsdvp[0] != NULL) { nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); } vrele(tondp->ni_startdir); nfsvno_relpathbuf(tondp); out1: vrele(fromndp->ni_startdir); nfsvno_relpathbuf(fromndp); NFSEXITCODE(error); return (error); } /* * Link vnode op. */ int nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *xp; int error = 0; xp = ndp->ni_vp; if (xp != NULL) { error = EEXIST; } else { xp = ndp->ni_dvp; if (vp->v_mount != xp->v_mount) error = EXDEV; } if (!error) { NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (!VN_IS_DOOMED(vp)) error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); else error = EPERM; if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); NFSVOPUNLOCK(vp); } else { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vrele(ndp->ni_vp); } nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Do the fsync() appropriate for the commit. */ int nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, struct thread *td) { int error = 0; /* * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of * file is done. At this time VOP_FSYNC does not accept offset and * byte count parameters so call VOP_FSYNC the whole file for now. * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. * File systems that do not use the buffer cache (as indicated * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). */ if (cnt == 0 || cnt > MAX_COMMIT_COUNT || (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { /* * Give up and do the whole thing */ if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); } else { /* * Locate and synchronously write any buffers that fall * into the requested range. Note: we are assuming that * f_iosize is a power of 2. */ int iosize = vp->v_mount->mnt_stat.f_iosize; int iomask = iosize - 1; struct bufobj *bo; daddr_t lblkno; /* * Align to iosize boundary, super-align to page boundary. */ if (off & iomask) { cnt += off & iomask; off &= ~(u_quad_t)iomask; } if (off & PAGE_MASK) { cnt += off & PAGE_MASK; off &= ~(u_quad_t)PAGE_MASK; } lblkno = off / iosize; if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, off, off + cnt, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } bo = &vp->v_bufobj; BO_LOCK(bo); while (cnt > 0) { struct buf *bp; /* * If we have a buffer and it is marked B_DELWRI we * have to lock and write it. Otherwise the prior * write is assumed to have already been committed. * * gbincore() can return invalid buffers now so we * have to check that bit as well (though B_DELWRI * should not be set if B_INVAL is set there could be * a race here since we haven't locked the buffer). */ if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { BO_LOCK(bo); continue; /* retry */ } if ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI) { bremfree(bp); bp->b_flags &= ~B_ASYNC; bwrite(bp); ++nfs_commit_miss; } else BUF_UNLOCK(bp); BO_LOCK(bo); } ++nfs_commit_blks; if (cnt < iosize) break; cnt -= iosize; ++lblkno; } BO_UNLOCK(bo); } NFSEXITCODE(error); return (error); } /* * Statfs vnode op. */ int nfsvno_statfs(struct vnode *vp, struct statfs *sf) { struct statfs *tsf; int error; tsf = NULL; if (nfsrv_devidcnt > 0) { /* For a pNFS service, get the DS numbers. */ tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); error = nfsrv_pnfsstatfs(tsf, vp->v_mount); if (error != 0) { free(tsf, M_TEMP); tsf = NULL; } } error = VFS_STATFS(vp->v_mount, sf); if (error == 0) { if (tsf != NULL) { sf->f_blocks = tsf->f_blocks; sf->f_bavail = tsf->f_bavail; sf->f_bfree = tsf->f_bfree; sf->f_bsize = tsf->f_bsize; } /* * Since NFS handles these values as unsigned on the * wire, there is no way to represent negative values, * so set them to 0. Without this, they will appear * to be very large positive values for clients like * Solaris10. */ if (sf->f_bavail < 0) sf->f_bavail = 0; if (sf->f_ffree < 0) sf->f_ffree = 0; } free(tsf, M_TEMP); NFSEXITCODE(error); return (error); } /* * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but * must handle nfsrv_opencheck() calls after any other access checks. */ void nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct nfsexstuff *exp, struct vnode **vpp) { struct vnode *vp = NULL; u_quad_t tempsize; struct nfsexstuff nes; struct thread *p = curthread; if (ndp->ni_vp == NULL) nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, NULL, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { if (ndp->ni_vp == NULL) { vrele(ndp->ni_startdir); nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); /* For a pNFS server, create the data file on a DS. */ if (nd->nd_repstat == 0) { /* * Create a data file on a DS for a pNFS server. * This function just returns if not * running a pNFS DS or the creation fails. */ nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, cred, p); } vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); if (!nd->nd_repstat) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, cred); if (nd->nd_repstat != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; nd->nd_repstat = NFSERR_NOTSUPP; } else NFSSETBIT_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS); } else { nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp); } } vp = ndp->ni_vp; } else { if (ndp->ni_startdir) vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vp = ndp->ni_vp; if (create == NFSV4OPEN_CREATE) { if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); } if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { if (ndp->ni_cnd.cn_flags & RDONLY) NFSVNO_SETEXRDONLY(&nes); else NFSVNO_EXINIT(&nes); nd->nd_repstat = nfsvno_accchk(vp, VWRITE, cred, &nes, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; nd->nd_repstat = VOP_SETATTR(vp, &nvap->na_vattr, cred); } } else if (vp->v_type == VREG) { nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); } } } else { if (ndp->ni_cnd.cn_flags & HASBUF) nfsvno_relpathbuf(ndp); if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { vrele(ndp->ni_startdir); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vput(ndp->ni_vp); } } *vpp = vp; NFSEXITCODE2(0, nd); } /* * Updates the file rev and sets the mtime and ctime * to the current clock time, returning the va_filerev and va_Xtime * values. * Return ESTALE to indicate the vnode is VIRF_DOOMED. */ int nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, struct nfsrv_descript *nd, struct thread *p) { struct vattr va; VATTR_NULL(&va); vfs_timestamp(&va.va_mtime); if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); if (VN_IS_DOOMED(vp)) return (ESTALE); } (void) VOP_SETATTR(vp, &va, nd->nd_cred); (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); return (0); } /* * Glue routine to nfsv4_fillattr(). */ int nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) { struct statfs *sf; int error; sf = NULL; if (nfsrv_devidcnt > 0 && (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); error = nfsrv_pnfsstatfs(sf, mp); if (error != 0) { free(sf, M_TEMP); sf = NULL; } } error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, mounted_on_fileno, sf); free(sf, M_TEMP); NFSEXITCODE2(0, nd); return (error); } /* Since the Readdir vnode ops vary, put the entire functions in here. */ /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR * - calls VOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The NFSM_CLGET macro is used here so that the reply will be packed * tightly in mbuf clusters. * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. * - it trims out records with d_type == DT_WHT * these cannot be seen through NFS (unless we extend the protocol) * The alternate call nfsrvd_readdirplus() does lookups as well. * PS: The NFS protocol spec. does not clarify what the "count" byte * argument is a count of.. just name strings and file id's or the * entire reply rpc or ... * I tried just file name and id sizes and it confused the Sun client, * so I am using the full rpc size now. The "paranoia.." comment refers * to including the status longwords that are not a part of the dir. * "entry" structures, but are in the rpc. */ int nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct nfsvattr at; int nlen, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, ncookies; u_int64_t off, toff, verf __unused; u_long *cookies = NULL, *cookiep; struct uio io; struct iovec iv; int is_ufs; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); off = fxdr_unsigned(u_quad_t, *tl++); } else { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; verf = fxdr_hyper(tl); tl += 2; } toff = off; cnt = fxdr_unsigned(int, *tl); if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); fullsiz = siz; if (nd->nd_flag & ND_NFSV3) { nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); #if 0 /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ if (!nd->nd_repstat && toff && verf != at.na_filerev) nd->nd_repstat = NFSERR_BAD_COOKIE; #endif } if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (nd->nd_repstat == 0 && cnt == 0) { if (nd->nd_flag & ND_NFSV2) /* NFSv2 does not have NFSERR_TOOSMALL */ nd->nd_repstat = EPERM; else nd->nd_repstat = NFSERR_TOOSMALL; } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; rbuf = malloc(siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (nd->nd_flag & ND_NFSV3) { getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; } /* * Handles the failed cases. nd->nd_repstat == 0 past here. */ if (nd->nd_repstat) { vput(vp); free(rbuf, M_TEMP); if (cookies) free(cookies, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); } else { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; } *tl++ = newnfs_false; *tl = newnfs_true; free(rbuf, M_TEMP); free(cookies, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } vput(vp); /* * If cnt > MCLBYTES and the reply will not be saved, use * ext_pgs mbufs for TLS. * For NFSv4.0, we do not know for sure if the reply will * be saved, so do not use ext_pgs mbufs for NFSv4.0. */ if (cnt > MCLBYTES && siz > MCLBYTES && (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) nd->nd_flag |= ND_EXTPG; /* * dirlen is the size of the reply, including all XDR and must * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate * if the XDR should be included in "count", but to be safe, we do. * (Include the two booleans at the end of the reply in dirlen now.) */ if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; } else { dirlen = 2 * NFSX_UNSIGNED; } /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN) { if (nd->nd_flag & ND_NFSV3) dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); else dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); if (dirlen > cnt) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; } else { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; } *tl = txdr_unsigned(dp->d_fileno); (void) nfsm_strtom(nd, dp->d_name, nlen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; } else NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(*cookiep); } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos < cend) eofflag = 0; NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; free(rbuf, M_TEMP); free(cookies, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Readdirplus for V3 and Readdir for V4. */ int nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct vnode *nvp; fhandle_t nfh; struct nfsvattr nva, at, *nvap = &nva; struct mbuf *mb0, *mb1; struct nfsreferral *refp; int nlen, r, error = 0, getret = 1, usevget = 1; int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; caddr_t bpos0, bpos1; u_int64_t off, toff, verf; u_long *cookies = NULL, *cookiep; nfsattrbit_t attrbits, rderrbits, savbits; struct uio io; struct iovec iv; struct componentname cn; int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; struct mount *mp, *new_mp; uint64_t mounted_on_fileno; struct thread *p = curthread; int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); off = fxdr_hyper(tl); toff = off; tl += 2; verf = fxdr_hyper(tl); tl += 2; siz = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); /* * Use the server's maximum data transfer size as the upper bound * on reply datalen. */ if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); /* * siz is a "hint" of how much directory information (name, fileid, * cookie) should be in the reply. At least one client "hints" 0, * so I set it to cnt for that case. I also round it up to the * next multiple of DIRBLKSIZ. * Since the size of a Readdirplus directory entry reply will always * be greater than a directory entry returned by VOP_READDIR(), it * does not make sense to read more than NFS_SRVMAXDATA() via * VOP_READDIR(). */ if (siz <= 0) siz = cnt; else if (siz > NFS_SRVMAXDATA(nd)) siz = NFS_SRVMAXDATA(nd); siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); if (nd->nd_flag & ND_NFSV4) { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; NFSSET_ATTRBIT(&savbits, &attrbits); NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd); NFSZERO_ATTRBIT(&rderrbits); NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); } else { NFSZERO_ATTRBIT(&attrbits); } fullsiz = siz; nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); #if 0 if (!nd->nd_repstat) { if (off && verf != at.na_filerev) { /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ if (nd->nd_flag & ND_NFSV4) { nd->nd_repstat = NFSERR_NOTSAME; } else { nd->nd_repstat = NFSERR_BAD_COOKIE; } } } #endif if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (!nd->nd_repstat && cnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; rbuf = malloc(siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (!nd->nd_repstat) nd->nd_repstat = getret; if (nd->nd_repstat) { vput(vp); if (cookies) free(cookies, M_TEMP); free(rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; *tl++ = newnfs_false; *tl = newnfs_true; free(cookies, M_TEMP); free(rbuf, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || ((nd->nd_flag & ND_NFSV4) && ((dp->d_namlen == 1 && dp->d_name[0] == '.') || (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } /* * Busy the file system so that the mount point won't go away * and, as such, VFS_VGET() can be used safely. */ mp = vp->v_mount; vfs_ref(mp); NFSVOPUNLOCK(vp); nd->nd_repstat = vfs_busy(mp, 0); vfs_rel(mp); if (nd->nd_repstat != 0) { vrele(vp); free(cookies, M_TEMP); free(rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * Check to see if entries in this directory can be safely acquired * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. * ZFS snapshot directories need VOP_LOOKUP(), so that any * automount of the snapshot directory that is required will * be done. * This needs to be done here for NFSv4, since NFSv4 never does * a VFS_VGET() for "." or "..". */ if (is_zfs == 1) { r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); if (r == EOPNOTSUPP) { usevget = 0; cn.cn_nameiop = LOOKUP; cn.cn_lkflags = LK_SHARED | LK_RETRY; cn.cn_cred = nd->nd_cred; cn.cn_thread = p; } else if (r == 0) vput(nvp); } /* * If the reply is likely to exceed MCLBYTES and the reply will * not be saved, use ext_pgs mbufs for TLS. * It is difficult to predict how large each entry will be and * how many entries have been read, so just assume the directory * entries grow by a factor of 4 when attributes are included. * For NFSv4.0, we do not know for sure if the reply will * be saved, so do not use ext_pgs mbufs for NFSv4.0. */ if (cnt > MCLBYTES && siz > MCLBYTES / 4 && (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) nd->nd_flag |= ND_EXTPG; /* * Save this position, in case there is an error before one entry * is created. */ mb0 = nd->nd_mb; bpos0 = nd->nd_bpos; bextpg0 = nd->nd_bextpg; bextpgsiz0 = nd->nd_bextpgsiz; /* * Fill in the first part of the reply. * dirlen is the reply length in bytes and cannot exceed cnt. * (Include the two booleans at the end of the reply in dirlen now, * so we recognize when we have exceeded cnt.) */ if (nd->nd_flag & ND_NFSV3) { dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; nfsrv_postopattr(nd, getret, &at); } else { dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; } NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); txdr_hyper(at.na_filerev, tl); /* * Save this position, in case there is an empty reply needed. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; bextpg1 = nd->nd_bextpg; bextpgsiz1 = nd->nd_bextpgsiz; /* Loop through the records and build reply */ entrycnt = 0; while (cpos < cend && ncookies > 0 && dirlen < cnt) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN && ((nd->nd_flag & ND_NFSV3) || nlen > 2 || (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) || (nlen == 1 && dp->d_name[0] != '.'))) { /* * Save the current position in the reply, in case * this entry exceeds cnt. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; bextpg1 = nd->nd_bextpg; bextpgsiz1 = nd->nd_bextpgsiz; /* * For readdir_and_lookup get the vnode using * the file number. */ nvp = NULL; refp = NULL; r = 0; at_root = 0; needs_unbusy = 0; new_mp = mp; mounted_on_fileno = (uint64_t)dp->d_fileno; if ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&savbits)) { if (nd->nd_flag & ND_NFSV4) refp = nfsv4root_getreferral(NULL, vp, dp->d_fileno); if (refp == NULL) { if (usevget) r = VFS_VGET(mp, dp->d_fileno, LK_SHARED, &nvp); else r = EOPNOTSUPP; if (r == EOPNOTSUPP) { if (usevget) { usevget = 0; cn.cn_nameiop = LOOKUP; cn.cn_lkflags = LK_SHARED | LK_RETRY; cn.cn_cred = nd->nd_cred; cn.cn_thread = p; } cn.cn_nameptr = dp->d_name; cn.cn_namelen = nlen; cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF; if (nlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.') cn.cn_flags |= ISDOTDOT; if (NFSVOPLOCK(vp, LK_SHARED) != 0) { nd->nd_repstat = EPERM; break; } if ((vp->v_vflag & VV_ROOT) != 0 && (cn.cn_flags & ISDOTDOT) != 0) { vref(vp); nvp = vp; r = 0; } else { r = VOP_LOOKUP(vp, &nvp, &cn); if (vp != nvp) NFSVOPUNLOCK(vp); } } /* * For NFSv4, check to see if nvp is * a mount point and get the mount * point vnode, as required. */ if (r == 0 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && nvp->v_mountedhere != NULL) { new_mp = nvp->v_mountedhere; r = vfs_busy(new_mp, 0); vput(nvp); nvp = NULL; if (r == 0) { r = VFS_ROOT(new_mp, LK_SHARED, &nvp); needs_unbusy = 1; if (r == 0) at_root = 1; } } } /* * If we failed to look up the entry, then it * has become invalid, most likely removed. */ if (r != 0) { if (needs_unbusy) vfs_unbusy(new_mp); goto invalid; } KASSERT(refp != NULL || nvp != NULL, ("%s: undetected lookup error", __func__)); if (refp == NULL && ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&attrbits))) { r = nfsvno_getfh(nvp, &nfh, p); if (!r) r = nfsvno_getattr(nvp, nvap, nd, p, 1, &attrbits); if (r == 0 && is_zfs == 1 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && vp->v_mount != nvp->v_mount) { /* * For a ZFS snapshot, there is a * pseudo mount that does not set * v_mountedhere, so it needs to * be detected via a different * mount structure. */ at_root = 1; if (new_mp == mp) new_mp = nvp->v_mount; } } /* * If we failed to get attributes of the entry, * then just skip it for NFSv3 (the traditional * behavior in the old NFS server). * For NFSv4 the behavior is controlled by * RDATTRERROR: we either ignore the error or * fail the request. * Note that RDATTRERROR is never set for NFSv3. */ if (r != 0) { if (!NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR)) { vput(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); if ((nd->nd_flag & ND_NFSV3)) goto invalid; nd->nd_repstat = r; break; } } } /* * Build the directory record xdr */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(dp->d_fileno); dirlen += nfsm_strtom(nd, dp->d_name, nlen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = txdr_unsigned(*cookiep); nfsrv_postopattr(nd, 0, nvap); dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); if (nvp != NULL) vput(nvp); } else { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(*cookiep); dirlen += nfsm_strtom(nd, dp->d_name, nlen); if (nvp != NULL) { supports_nfsv4acls = nfs_supportsnfsv4acls(nvp); NFSVOPUNLOCK(nvp); } else supports_nfsv4acls = 0; if (refp != NULL) { dirlen += nfsrv_putreferralattr(nd, &savbits, refp, 0, &nd->nd_repstat); if (nd->nd_repstat) { if (nvp != NULL) vrele(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); break; } } else if (r) { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &rderrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } else { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &attrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } if (nvp != NULL) vrele(nvp); dirlen += (3 * NFSX_UNSIGNED); } if (needs_unbusy != 0) vfs_unbusy(new_mp); if (dirlen <= cnt) entrycnt++; } invalid: cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } vrele(vp); vfs_unbusy(mp); /* * If dirlen > cnt, we must strip off the last entry. If that * results in an empty reply, report NFSERR_TOOSMALL. */ if (dirlen > cnt || nd->nd_repstat) { if (!nd->nd_repstat && entrycnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (nd->nd_repstat) { nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); } else nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1); eofflag = 0; } else if (cpos < cend) eofflag = 0; if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; } free(cookies, M_TEMP); free(rbuf, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Get the settable attributes out of the mbuf list. * (Return 0 or EBADRPC) */ int nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; struct nfsv2_sattr *sp; int error = 0, toclient = 0; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); /* * Some old clients didn't fill in the high order 16bits. * --> check the low order 2 bytes for 0xffff */ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) nvap->na_mode = nfstov_mode(sp->sa_mode); if (sp->sa_uid != newnfs_xdrneg1) nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); if (sp->sa_gid != newnfs_xdrneg1) nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); if (sp->sa_size != newnfs_xdrneg1) nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { #ifdef notyet fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); #else nvap->na_atime.tv_sec = fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); nvap->na_atime.tv_nsec = 0; #endif } if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); break; case ND_NFSV3: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_uid = fxdr_unsigned(uid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_gid = fxdr_unsigned(gid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); nvap->na_size = fxdr_hyper(tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_atime); toclient = 1; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; break; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; break; } break; case ND_NFSV4: error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Handle the setable attributes for V4. * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. */ int nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; int attrsum = 0; int i, j; int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; int moderet, toclient = 0; u_char *cp, namestr[NFSV4_SMALLSTR + 1]; uid_t uid; gid_t gid; u_short mode, mask; /* Same type as va_mode. */ struct vattr va; error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsize = fxdr_unsigned(int, *tl); /* * Loop around getting the setable attributes. If an unsupported * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. */ if (retnotsup) { nd->nd_repstat = NFSERR_ATTRNOTSUPP; bitpos = NFSATTRBIT_MAX; } else { bitpos = 0; } moderet = 0; for (; bitpos < NFSATTRBIT_MAX; bitpos++) { if (attrsum > attrsize) { error = NFSERR_BADXDR; goto nfsmout; } if (NFSISSET_ATTRBIT(attrbitp, bitpos)) switch (bitpos) { case NFSATTRBIT_SIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (vp != NULL && vp->v_type != VREG) { error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL; goto nfsmout; } nvap->na_size = fxdr_hyper(tl); attrsum += NFSX_HYPER; break; case NFSATTRBIT_ACL: error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, p); if (error) goto nfsmout; if (aceerr && !nd->nd_repstat) nd->nd_repstat = aceerr; attrsum += aclsize; break; case NFSATTRBIT_ARCHIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HIDDEN: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MIMETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); break; case NFSATTRBIT_MODE: moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid); if (!nd->nd_repstat) nvap->na_uid = uid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_OWNERGROUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid); if (!nd->nd_repstat) nvap->na_gid = gid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_SYSTEM: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESSSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_atime); toclient = 1; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; } break; case NFSATTRBIT_TIMEBACKUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_btime); attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; } break; case NFSATTRBIT_MODESETMASKED: NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); mode = fxdr_unsigned(u_short, *tl++); mask = fxdr_unsigned(u_short, *tl); /* * vp == NULL implies an Open/Create operation. * This attribute can only be used for Setattr and * only for NFSv4.1 or higher. * If moderet != 0, a mode attribute has also been * specified and this attribute cannot be done in the * same Setattr operation. */ if ((nd->nd_flag & ND_NFSV41) == 0) nd->nd_repstat = NFSERR_ATTRNOTSUPP; else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 || vp == NULL) nd->nd_repstat = NFSERR_INVAL; else if (moderet == 0) moderet = VOP_GETATTR(vp, &va, nd->nd_cred); if (moderet == 0) nvap->na_mode = (mode & mask) | (va.va_mode & ~mask); else nd->nd_repstat = moderet; attrsum += 2 * NFSX_UNSIGNED; break; default: nd->nd_repstat = NFSERR_ATTRNOTSUPP; /* * set bitpos so we drop out of the loop. */ bitpos = NFSATTRBIT_MAX; break; } } /* * some clients pad the attrlist, so we need to skip over the * padding. */ if (attrsum > attrsize) { error = NFSERR_BADXDR; } else { attrsize = NFSM_RNDUP(attrsize); if (attrsum < attrsize) error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Check/setup export credentials. */ int nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, struct ucred *credanon) { int error = 0; /* * Check/setup credentials. */ if (nd->nd_flag & ND_GSS) exp->nes_exflag &= ~MNT_EXPORTANON; /* * Check to see if the operation is allowed for this security flavor. * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. * Also, allow Secinfo, so that it can acquire the correct flavor(s). */ if (nfsvno_testexp(nd, exp) && nd->nd_procnum != NFSV4OP_SECINFO && nd->nd_procnum != NFSPROC_FSINFO) { if (nd->nd_flag & ND_NFSV4) error = NFSERR_WRONGSEC; else error = (NFSERR_AUTHERR | AUTH_TOOWEAK); goto out; } /* * Check to see if the file system is exported V4 only. */ if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { error = NFSERR_PROGNOTV4; goto out; } /* * Now, map the user credentials. * (Note that ND_AUTHNONE will only be set for an NFSv3 * Fsinfo RPC. If set for anything else, this code might need * to change.) */ if (NFSVNO_EXPORTED(exp)) { if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || NFSVNO_EXPORTANON(exp) || (nd->nd_flag & ND_AUTHNONE) != 0) { nd->nd_cred->cr_uid = credanon->cr_uid; nd->nd_cred->cr_gid = credanon->cr_gid; crsetgroups(nd->nd_cred, credanon->cr_ngroups, credanon->cr_groups); } else if ((nd->nd_flag & ND_GSS) == 0) { /* * If using AUTH_SYS, call nfsrv_getgrpscred() to see * if there is a replacement credential with a group * list set up by "nfsuserd -manage-gids". * If there is no replacement, nfsrv_getgrpscred() * simply returns its argument. */ nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); } } out: NFSEXITCODE2(error, nd); return (error); } /* * Check exports. */ int nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, struct ucred **credp) { int error; error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, exp->nes_secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > MAXSECFLAVORS) { printf("nfsvno_checkexp: numsecflavors out of range\n"); exp->nes_numsecflavor = 0; error = EACCES; } NFSEXITCODE(error); return (error); } /* * Get a vnode for a file handle and export stuff. */ int nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct ucred **credp) { int error; *credp = NULL; exp->nes_numsecflavor = 0; error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); if (error != 0) /* Make sure the server replies ESTALE to the client. */ error = ESTALE; if (nam && !error) { error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, exp->nes_secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } else { vput(*vpp); } } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > MAXSECFLAVORS) { printf("nfsvno_fhtovp: numsecflavors out of range\n"); exp->nes_numsecflavor = 0; error = EACCES; vput(*vpp); } } NFSEXITCODE(error); return (error); } /* * nfsd_fhtovp() - convert a fh to a vnode ptr * - look up fsid in mount list (if not found ret error) * - get vp and export rights by calling nfsvno_fhtovp() * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon * for AUTH_SYS * - if mpp != NULL, return the mount point so that it can * be used for vn_finished_write() by the caller */ void nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct mount **mpp, int startwrite) { struct mount *mp; struct ucred *credanon; fhandle_t *fhp; fhp = (fhandle_t *)nfp->nfsrvfh_data; /* * Check for the special case of the nfsv4root_fh. */ mp = vfs_busyfs(&fhp->fh_fsid); if (mpp != NULL) *mpp = mp; if (mp == NULL) { *vpp = NULL; nd->nd_repstat = ESTALE; goto out; } if (startwrite) { vn_start_write(NULL, mpp, V_WAIT); if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) lktype = LK_EXCLUSIVE; } nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, &credanon); vfs_unbusy(mp); /* * For NFSv4 without a pseudo root fs, unexported file handles * can be returned, so that Lookup works everywhere. */ if (!nd->nd_repstat && exp->nes_exflag == 0 && !(nd->nd_flag & ND_NFSV4)) { vput(*vpp); nd->nd_repstat = EACCES; } /* * If TLS is required by the export, check the flags in nd_flag. */ if (nd->nd_repstat == 0 && ((NFSVNO_EXTLS(exp) && (nd->nd_flag & ND_TLS) == 0) || (NFSVNO_EXTLSCERT(exp) && (nd->nd_flag & ND_TLSCERT) == 0) || (NFSVNO_EXTLSCERTUSER(exp) && (nd->nd_flag & ND_TLSCERTUSER) == 0))) { vput(*vpp); nd->nd_repstat = NFSERR_ACCES; } /* * Personally, I've never seen any point in requiring a * reserved port#, since only in the rare case where the * clients are all boxes with secure system privileges, * does it provide any enhanced security, but... some people * believe it to be useful and keep putting this code back in. * (There is also some "security checker" out there that * complains if the nfs server doesn't enforce this.) * However, note the following: * RFC3530 (NFSv4) specifies that a reserved port# not be * required. * RFC2623 recommends that, if a reserved port# is checked for, * that there be a way to turn that off--> ifdef'd. */ #ifdef NFS_REQRSVPORT if (!nd->nd_repstat) { struct sockaddr_in *saddr; struct sockaddr_in6 *saddr6; saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); if (!(nd->nd_flag & ND_NFSV4) && ((saddr->sin_family == AF_INET && ntohs(saddr->sin_port) >= IPPORT_RESERVED) || (saddr6->sin6_family == AF_INET6 && ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { vput(*vpp); nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); } } #endif /* NFS_REQRSVPORT */ /* * Check/setup credentials. */ if (!nd->nd_repstat) { nd->nd_saveduid = nd->nd_cred->cr_uid; nd->nd_repstat = nfsd_excred(nd, exp, credanon); if (nd->nd_repstat) vput(*vpp); } if (credanon != NULL) crfree(credanon); if (nd->nd_repstat) { if (startwrite) vn_finished_write(mp); *vpp = NULL; if (mpp != NULL) *mpp = NULL; } out: NFSEXITCODE2(0, nd); } /* * glue for fp. */ static int fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) { struct filedesc *fdp; struct file *fp; int error = 0; fdp = p->td_proc->p_fd; if (fd < 0 || fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { error = EBADF; goto out; } *fpp = fp; out: NFSEXITCODE(error); return (error); } /* * Called from nfssvc() to update the exports list. Just call * vfs_export(). This has to be done, since the v4 root fake fs isn't * in the mount list. */ int nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) { struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; int error = 0; struct nameidata nd; fhandle_t fh; error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) nfs_rootfhset = 0; else if (error == 0) { if (nfsexargp->fspec == NULL) { error = EPERM; goto out; } /* * If fspec != NULL, this is the v4root path. */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec, p); if ((error = namei(&nd)) != 0) goto out; error = nfsvno_getfh(nd.ni_vp, &fh, p); vrele(nd.ni_vp); if (!error) { nfs_rootfh.nfsrvfh_len = NFSX_MYFH; NFSBCOPY((caddr_t)&fh, nfs_rootfh.nfsrvfh_data, sizeof (fhandle_t)); nfs_rootfhset = 1; } } out: NFSEXITCODE(error); return (error); } /* * This function needs to test to see if the system is near its limit * for memory allocation via malloc() or mget() and return True iff * either of these resources are near their limit. * XXX (For now, this is just a stub.) */ int nfsrv_testmalloclimit = 0; int nfsrv_mallocmget_limit(void) { static int printmesg = 0; static int testval = 1; if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { if ((printmesg++ % 100) == 0) printf("nfsd: malloc/mget near limit\n"); return (1); } return (0); } /* * BSD specific initialization of a mount point. */ void nfsd_mntinit(void) { static int inited = 0; if (inited) return; inited = 1; nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); TAILQ_INIT(&nfsv4root_mnt.mnt_lazyvnodelist); nfsv4root_mnt.mnt_export = NULL; TAILQ_INIT(&nfsv4root_opt); TAILQ_INIT(&nfsv4root_newopt); nfsv4root_mnt.mnt_opt = &nfsv4root_opt; nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; nfsv4root_mnt.mnt_nvnodelistsize = 0; nfsv4root_mnt.mnt_lazyvnodelistsize = 0; } /* * Get a vnode for a file handle, without checking exports, etc. */ struct vnode * nfsvno_getvp(fhandle_t *fhp) { struct mount *mp; struct vnode *vp; int error; mp = vfs_busyfs(&fhp->fh_fsid); if (mp == NULL) return (NULL); error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error) return (NULL); return (vp); } /* * Do a local VOP_ADVLOCK(). */ int nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, u_int64_t end, struct thread *td) { int error = 0; struct flock fl; u_int64_t tlen; if (nfsrv_dolocallocks == 0) goto out; ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); fl.l_whence = SEEK_SET; fl.l_type = ftype; fl.l_start = (off_t)first; if (end == NFS64BITSSET) { fl.l_len = 0; } else { tlen = end - first; fl.l_len = (off_t)tlen; } /* * For FreeBSD8, the l_pid and l_sysid must be set to the same * values for all calls, so that all locks will be held by the * nfsd server. (The nfsd server handles conflicts between the * various clients.) * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 * bytes, so it can't be put in l_sysid. */ if (nfsv4_sysid == 0) nfsv4_sysid = nlm_acquire_next_sysid(); fl.l_pid = (pid_t)0; fl.l_sysid = (int)nfsv4_sysid; if (ftype == F_UNLCK) error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, (F_POSIX | F_REMOTE)); else error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, (F_POSIX | F_REMOTE)); out: NFSEXITCODE(error); return (error); } /* * Check the nfsv4 root exports. */ int nfsvno_v4rootexport(struct nfsrv_descript *nd) { struct ucred *credanon; int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i; uint64_t exflags; error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, &credanon, &numsecflavor, secflavors); if (error) { error = NFSERR_PROGUNAVAIL; goto out; } if (credanon != NULL) crfree(credanon); for (i = 0; i < numsecflavor; i++) { if (secflavors[i] == AUTH_SYS) nd->nd_flag |= ND_EXAUTHSYS; else if (secflavors[i] == RPCSEC_GSS_KRB5) nd->nd_flag |= ND_EXGSS; else if (secflavors[i] == RPCSEC_GSS_KRB5I) nd->nd_flag |= ND_EXGSSINTEGRITY; else if (secflavors[i] == RPCSEC_GSS_KRB5P) nd->nd_flag |= ND_EXGSSPRIVACY; } /* And set ND_EXxx flags for TLS. */ - if ((exflags & MNTEX_TLS) != 0) { + if ((exflags & MNT_EXTLS) != 0) { nd->nd_flag |= ND_EXTLS; - if ((exflags & MNTEX_TLSCERT) != 0) + if ((exflags & MNT_EXTLSCERT) != 0) nd->nd_flag |= ND_EXTLSCERT; - if ((exflags & MNTEX_TLSCERTUSER) != 0) + if ((exflags & MNT_EXTLSCERTUSER) != 0) nd->nd_flag |= ND_EXTLSCERTUSER; } out: NFSEXITCODE(error); return (error); } /* * Nfs server pseudo system call for the nfsd's */ /* * MPSAFE */ static int nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) { struct file *fp; struct nfsd_addsock_args sockarg; struct nfsd_nfsd_args nfsdarg; struct nfsd_nfsd_oargs onfsdarg; struct nfsd_pnfsd_args pnfsdarg; struct vnode *vp, *nvp, *curdvp; struct pnfsdsfile *pf; struct nfsdevice *ds, *fds; cap_rights_t rights; int buflen, error, ret; char *buf, *cp, *cp2, *cp3; char fname[PNFS_FILENAME_LEN + 1]; if (uap->flag & NFSSVC_NFSDADDSOCK) { error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); if (error) goto out; /* * Since we don't know what rights might be required, * pretend that we need them all. It is better to be too * careful than too reckless. */ error = fget(td, sockarg.sock, cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); if (error != 0) goto out; if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); error = EPERM; goto out; } error = nfsrvd_addsock(fp); fdrop(fp, td); } else if (uap->flag & NFSSVC_NFSDNFSD) { if (uap->argp == NULL) { error = EINVAL; goto out; } if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); if (error == 0) { nfsdarg.principal = onfsdarg.principal; nfsdarg.minthreads = onfsdarg.minthreads; nfsdarg.maxthreads = onfsdarg.maxthreads; nfsdarg.version = 1; nfsdarg.addr = NULL; nfsdarg.addrlen = 0; nfsdarg.dnshost = NULL; nfsdarg.dnshostlen = 0; nfsdarg.dspath = NULL; nfsdarg.dspathlen = 0; nfsdarg.mdspath = NULL; nfsdarg.mdspathlen = 0; nfsdarg.mirrorcnt = 1; } } else error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); if (error) goto out; if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 && nfsdarg.mirrorcnt >= 1 && nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) { NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen, nfsdarg.dspathlen, nfsdarg.dnshostlen, nfsdarg.mdspathlen, nfsdarg.mirrorcnt); cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); if (error != 0) { free(cp, M_TEMP); goto out; } cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ nfsdarg.addr = cp; cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); if (error != 0) { free(nfsdarg.addr, M_TEMP); free(cp, M_TEMP); goto out; } cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ nfsdarg.dnshost = cp; cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); if (error != 0) { free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(cp, M_TEMP); goto out; } cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ nfsdarg.dspath = cp; cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen); if (error != 0) { free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(nfsdarg.dspath, M_TEMP); free(cp, M_TEMP); goto out; } cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */ nfsdarg.mdspath = cp; } else { nfsdarg.addr = NULL; nfsdarg.addrlen = 0; nfsdarg.dnshost = NULL; nfsdarg.dnshostlen = 0; nfsdarg.dspath = NULL; nfsdarg.dspathlen = 0; nfsdarg.mdspath = NULL; nfsdarg.mdspathlen = 0; nfsdarg.mirrorcnt = 1; } error = nfsrvd_nfsd(td, &nfsdarg); free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(nfsdarg.dspath, M_TEMP); free(nfsdarg.mdspath, M_TEMP); } else if (uap->flag & NFSSVC_PNFSDS) { error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER || pnfsdarg.op == PNFSDOP_FORCEDELDS)) { cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, NULL); if (error == 0) error = nfsrv_deldsserver(pnfsdarg.op, cp, td); free(cp, M_TEMP); } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; buf = malloc(buflen, M_TEMP, M_WAITOK); error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, NULL); NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); if (error == 0 && pnfsdarg.dspath != NULL) { cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); error = copyinstr(pnfsdarg.dspath, cp2, PATH_MAX + 1, NULL); NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", error); } else cp2 = NULL; if (error == 0 && pnfsdarg.curdspath != NULL) { cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); error = copyinstr(pnfsdarg.curdspath, cp3, PATH_MAX + 1, NULL); NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", error); } else cp3 = NULL; curdvp = NULL; fds = NULL; if (error == 0) error = nfsrv_mdscopymr(cp, cp2, cp3, buf, &buflen, fname, td, &vp, &nvp, &pf, &ds, &fds); NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); if (error == 0) { if (pf->dsf_dir >= nfsrv_dsdirsize) { printf("copymr: dsdir out of range\n"); pf->dsf_dir = 0; } NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); error = nfsrv_copymr(vp, nvp, ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, (struct pnfsdsfile *)buf, buflen / sizeof(*pf), td->td_ucred, td); vput(vp); vput(nvp); if (fds != NULL && error == 0) { curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; ret = vn_lock(curdvp, LK_EXCLUSIVE); if (ret == 0) { nfsrv_dsremove(curdvp, fname, td->td_ucred, td); NFSVOPUNLOCK(curdvp); } } NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); } free(cp, M_TEMP); free(cp2, M_TEMP); free(cp3, M_TEMP); free(buf, M_TEMP); } } else { error = nfssvc_srvcall(td, uap, td->td_ucred); } out: NFSEXITCODE(error); return (error); } static int nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { struct nfsex_args export; struct nfsex_oldargs oexp; struct file *fp = NULL; int stablefd, i, len; struct nfsd_clid adminrevoke; struct nfsd_dumplist dumplist; struct nfsd_dumpclients *dumpclients; struct nfsd_dumplocklist dumplocklist; struct nfsd_dumplocks *dumplocks; struct nameidata nd; vnode_t vp; int error = EINVAL, igotlock; struct proc *procp; gid_t *grps; static int suspend_nfsd = 0; if (uap->flag & NFSSVC_PUBLICFH) { NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); error = copyin(uap->argp, &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); if (!error) nfs_pubfhset = 1; } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) { error = copyin(uap->argp,(caddr_t)&export, sizeof (struct nfsex_args)); if (!error) { grps = NULL; if (export.export.ex_ngroups > NGROUPS_MAX || export.export.ex_ngroups < 0) error = EINVAL; else if (export.export.ex_ngroups > 0) { grps = malloc(export.export.ex_ngroups * sizeof(gid_t), M_TEMP, M_WAITOK); error = copyin(export.export.ex_groups, grps, export.export.ex_ngroups * sizeof(gid_t)); export.export.ex_groups = grps; } else export.export.ex_groups = NULL; if (!error) error = nfsrv_v4rootexport(&export, cred, p); free(grps, M_TEMP); } } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == NFSSVC_V4ROOTEXPORT) { error = copyin(uap->argp,(caddr_t)&oexp, sizeof (struct nfsex_oldargs)); if (!error) { memset(&export.export, 0, sizeof(export.export)); export.export.ex_flags = (uint64_t)oexp.export.ex_flags; export.export.ex_root = oexp.export.ex_root; export.export.ex_uid = oexp.export.ex_anon.cr_uid; export.export.ex_ngroups = oexp.export.ex_anon.cr_ngroups; export.export.ex_groups = NULL; if (export.export.ex_ngroups > XU_NGROUPS || export.export.ex_ngroups < 0) error = EINVAL; else if (export.export.ex_ngroups > 0) { export.export.ex_groups = malloc( export.export.ex_ngroups * sizeof(gid_t), M_TEMP, M_WAITOK); for (i = 0; i < export.export.ex_ngroups; i++) export.export.ex_groups[i] = oexp.export.ex_anon.cr_groups[i]; } export.export.ex_addr = oexp.export.ex_addr; export.export.ex_addrlen = oexp.export.ex_addrlen; export.export.ex_mask = oexp.export.ex_mask; export.export.ex_masklen = oexp.export.ex_masklen; export.export.ex_indexfile = oexp.export.ex_indexfile; export.export.ex_numsecflavors = oexp.export.ex_numsecflavors; if (export.export.ex_numsecflavors >= MAXSECFLAVORS || export.export.ex_numsecflavors < 0) error = EINVAL; else { for (i = 0; i < export.export.ex_numsecflavors; i++) export.export.ex_secflavors[i] = oexp.export.ex_secflavors[i]; } export.fspec = oexp.fspec; if (error == 0) error = nfsrv_v4rootexport(&export, cred, p); free(export.export.ex_groups, M_TEMP); } } else if (uap->flag & NFSSVC_NOPUBLICFH) { nfs_pubfhset = 0; error = 0; } else if (uap->flag & NFSSVC_STABLERESTART) { error = copyin(uap->argp, (caddr_t)&stablefd, sizeof (int)); if (!error) error = fp_getfvp(p, stablefd, &fp, &vp); if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) error = EBADF; if (!error && newnfs_numnfsd != 0) error = EPERM; if (!error) { nfsrv_stablefirst.nsf_fp = fp; nfsrv_setupstable(p); } } else if (uap->flag & NFSSVC_ADMINREVOKE) { error = copyin(uap->argp, (caddr_t)&adminrevoke, sizeof (struct nfsd_clid)); if (!error) error = nfsrv_adminrevoke(&adminrevoke, p); } else if (uap->flag & NFSSVC_DUMPCLIENTS) { error = copyin(uap->argp, (caddr_t)&dumplist, sizeof (struct nfsd_dumplist)); if (!error && (dumplist.ndl_size < 1 || dumplist.ndl_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) { len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO); nfsrv_dumpclients(dumpclients, dumplist.ndl_size); error = copyout(dumpclients, dumplist.ndl_list, len); free(dumpclients, M_TEMP); } } else if (uap->flag & NFSSVC_DUMPLOCKS) { error = copyin(uap->argp, (caddr_t)&dumplocklist, sizeof (struct nfsd_dumplocklist)); if (!error && (dumplocklist.ndllck_size < 1 || dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) error = nfsrv_lookupfilename(&nd, dumplocklist.ndllck_fname, p); if (!error) { len = sizeof (struct nfsd_dumplocks) * dumplocklist.ndllck_size; dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO); nfsrv_dumplocks(nd.ni_vp, dumplocks, dumplocklist.ndllck_size, p); vput(nd.ni_vp); error = copyout(dumplocks, dumplocklist.ndllck_list, len); free(dumplocks, M_TEMP); } } else if (uap->flag & NFSSVC_BACKUPSTABLE) { procp = p->td_proc; PROC_LOCK(procp); nfsd_master_pid = procp->p_pid; bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); nfsd_master_start = procp->p_stats->p_start; nfsd_master_proc = procp; PROC_UNLOCK(procp); } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd == 0) { /* Lock out all nfsd threads */ do { igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0 && suspend_nfsd == 0); suspend_nfsd = 1; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd != 0) { nfsv4_unlock(&nfsd_suspend_lock, 0); suspend_nfsd = 0; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } NFSEXITCODE(error); return (error); } /* * Check exports. * Returns 0 if ok, 1 otherwise. */ int nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) { int i; /* * This seems odd, but allow the case where the security flavor * list is empty. This happens when NFSv4 is traversing non-exported * file systems. Exported file systems should always have a non-empty * security flavor list. */ if (exp->nes_numsecflavor == 0) return (0); for (i = 0; i < exp->nes_numsecflavor; i++) { /* * The tests for privacy and integrity must be first, * since ND_GSS is set for everything but AUTH_SYS. */ if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && (nd->nd_flag & ND_GSSPRIVACY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && (nd->nd_flag & ND_GSSINTEGRITY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && (nd->nd_flag & ND_GSS)) return (0); if (exp->nes_secflavors[i] == AUTH_SYS && (nd->nd_flag & ND_GSS) == 0) return (0); } return (1); } /* * Calculate a hash value for the fid in a file handle. */ uint32_t nfsrv_hashfh(fhandle_t *fhp) { uint32_t hashval; hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); return (hashval); } /* * Calculate a hash value for the sessionid. */ uint32_t nfsrv_hashsessionid(uint8_t *sessionid) { uint32_t hashval; hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); return (hashval); } /* * Signal the userland master nfsd to backup the stable restart file. */ void nfsrv_backupstable(void) { struct proc *procp; if (nfsd_master_proc != NULL) { procp = pfind(nfsd_master_pid); /* Try to make sure it is the correct process. */ if (procp == nfsd_master_proc && procp->p_stats->p_start.tv_sec == nfsd_master_start.tv_sec && procp->p_stats->p_start.tv_usec == nfsd_master_start.tv_usec && strcmp(procp->p_comm, nfsd_master_comm) == 0) kern_psignal(procp, SIGUSR2); else nfsd_master_proc = NULL; if (procp != NULL) PROC_UNLOCK(procp); } } /* * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. * The arguments are in a structure, so that they can be passed through * taskqueue for a kernel process to execute this function. */ struct nfsrvdscreate { int done; int inprog; struct task tsk; struct ucred *tcred; struct vnode *dvp; NFSPROC_T *p; struct pnfsdsfile *pf; int err; fhandle_t fh; struct vattr va; struct vattr createva; }; int nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) { struct vnode *nvp; struct nameidata named; struct vattr va; char *bufp; u_long *hashp; struct nfsnode *np; struct nfsmount *nmp; int error; NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; named.ni_cnd.cn_thread = p; named.ni_cnd.cn_nameptr = bufp; if (fnamep != NULL) { strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); named.ni_cnd.cn_namelen = strlen(bufp); } else named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); /* Create the date file in the DS mount. */ error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); if (error == 0) { error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); NFSVOPUNLOCK(dvp); if (error == 0) { /* Set the ownership of the file. */ error = VOP_SETATTR(nvp, nvap, tcred); NFSD_DEBUG(4, "nfsrv_dscreate:" " setattr-uid=%d\n", error); if (error != 0) vput(nvp); } if (error != 0) printf("pNFS: pnfscreate failed=%d\n", error); } else printf("pNFS: pnfscreate vnlock=%d\n", error); if (error == 0) { np = VTONFS(nvp); nmp = VFSTONFS(nvp->v_mount); if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") != 0 || nmp->nm_nam->sa_len > sizeof( struct sockaddr_in6) || np->n_fhp->nfh_len != NFSX_MYFH) { printf("Bad DS file: fstype=%s salen=%d" " fhlen=%d\n", nvp->v_mount->mnt_vfc->vfc_name, nmp->nm_nam->sa_len, np->n_fhp->nfh_len); error = ENOENT; } /* Set extattrs for the DS on the MDS file. */ if (error == 0) { if (dsa != NULL) { error = VOP_GETATTR(nvp, &va, tcred); if (error == 0) { dsa->dsa_filerev = va.va_filerev; dsa->dsa_size = va.va_size; dsa->dsa_atime = va.va_atime; dsa->dsa_mtime = va.va_mtime; dsa->dsa_bytes = va.va_bytes; } } if (error == 0) { NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, nmp->nm_nam->sa_len); NFSBCOPY(named.ni_cnd.cn_nameptr, pf->dsf_filename, sizeof(pf->dsf_filename)); } } else printf("pNFS: pnfscreate can't get DS" " attr=%d\n", error); if (nvpp != NULL && error == 0) *nvpp = nvp; else vput(nvp); } nfsvno_relpathbuf(&named); return (error); } /* * Start up the thread that will execute nfsrv_dscreate(). */ static void start_dscreate(void *arg, int pending) { struct nfsrvdscreate *dsc; dsc = (struct nfsrvdscreate *)arg; dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); dsc->done = 1; NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); } /* * Create a pNFS data file on the Data Server(s). */ static void nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, NFSPROC_T *p) { struct nfsrvdscreate *dsc, *tdsc = NULL; struct nfsdevice *ds, *tds, *fds; struct mount *mp; struct pnfsdsfile *pf, *tpf; struct pnfsdsattr dsattr; struct vattr va; struct vnode *dvp[NFSDEV_MAXMIRRORS]; struct nfsmount *nmp; fhandle_t fh; uid_t vauid; gid_t vagid; u_short vamode; struct ucred *tcred; int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret; int failpos, timo; /* Get a DS server directory in a round-robin order. */ mirrorcnt = 1; mp = vp->v_mount; ds = fds = NULL; NFSDDSLOCK(); /* * Search for the first entry that handles this MDS fs, but use the * first entry for all MDS fs's otherwise. */ TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { if (tds->nfsdev_nmp != NULL) { if (tds->nfsdev_mdsisset == 0 && ds == NULL) ds = tds; else if (tds->nfsdev_mdsisset != 0 && fsidcmp( &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) { ds = fds = tds; break; } } } if (ds == NULL) { NFSDDSUNLOCK(); NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); return; } i = dsdir[0] = ds->nfsdev_nextdir; ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; dvp[0] = ds->nfsdev_dsdir[i]; tds = TAILQ_NEXT(ds, nfsdev_list); if (nfsrv_maxpnfsmirror > 1 && tds != NULL) { TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) { if (tds->nfsdev_nmp != NULL && ((tds->nfsdev_mdsisset == 0 && fds == NULL) || (tds->nfsdev_mdsisset != 0 && fds != NULL && fsidcmp(&mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0))) { dsdir[mirrorcnt] = i; dvp[mirrorcnt] = tds->nfsdev_dsdir[i]; mirrorcnt++; if (mirrorcnt >= nfsrv_maxpnfsmirror) break; } } } /* Put at end of list to implement round-robin usage. */ TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); NFSDDSUNLOCK(); dsc = NULL; if (mirrorcnt > 1) tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, M_WAITOK | M_ZERO); tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK | M_ZERO); error = nfsvno_getfh(vp, &fh, p); if (error == 0) error = VOP_GETATTR(vp, &va, cred); if (error == 0) { /* Set the attributes for "vp" to Setattr the DS vp. */ vauid = va.va_uid; vagid = va.va_gid; vamode = va.va_mode; VATTR_NULL(&va); va.va_uid = vauid; va.va_gid = vagid; va.va_mode = vamode; va.va_size = 0; } else printf("pNFS: pnfscreate getfh+attr=%d\n", error); NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, cred->cr_gid); /* Make data file name based on FH. */ tcred = newnfs_getcred(); /* * Create the file on each DS mirror, using kernel process(es) for the * additional mirrors. */ failpos = -1; for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { tpf->dsf_dir = dsdir[i]; tdsc->tcred = tcred; tdsc->p = p; tdsc->pf = tpf; tdsc->createva = *vap; NFSBCOPY(&fh, &tdsc->fh, sizeof(fh)); tdsc->va = va; tdsc->dvp = dvp[i]; tdsc->done = 0; tdsc->inprog = 0; tdsc->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_dscreate, tdsc); NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, NULL, tcred, p, NULL); if (ret != 0) { KASSERT(error == 0, ("nfsrv_dscreate err=%d", error)); if (failpos == -1 && nfsds_failerr(ret)) failpos = i; else error = ret; } } } if (error == 0) { tpf->dsf_dir = dsdir[mirrorcnt - 1]; error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, &dsattr, NULL, tcred, p, NULL); if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) { failpos = mirrorcnt - 1; error = 0; } } timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; /* Wait for kernel task(s) to complete. */ for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { while (tdsc->inprog != 0 && tdsc->done == 0) tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); if (tdsc->err != 0) { if (failpos == -1 && nfsds_failerr(tdsc->err)) failpos = i; else if (error == 0) error = tdsc->err; } } /* * If failpos has been set, that mirror has failed, so it needs * to be disabled. */ if (failpos >= 0) { nmp = VFSTONFS(dvp[failpos]->v_mount); NFSLOCKMNT(nmp); if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { nmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(nmp); ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, ds); if (ds != NULL) nfsrv_killrpcs(nmp); NFSLOCKMNT(nmp); nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(nmp); } NFSUNLOCKMNT(nmp); } NFSFREECRED(tcred); if (error == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n", mirrorcnt, nfsrv_maxpnfsmirror); /* * For all mirrors that couldn't be created, fill in the * *pf structure, but with an IP address == 0.0.0.0. */ tpf = pf + mirrorcnt; for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) { *tpf = *pf; tpf->dsf_sin.sin_family = AF_INET; tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in); tpf->dsf_sin.sin_addr.s_addr = 0; tpf->dsf_sin.sin_port = 0; } error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p); if (error == 0) error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); if (error != 0) printf("pNFS: pnfscreate setextattr=%d\n", error); } else printf("pNFS: pnfscreate=%d\n", error); free(pf, M_TEMP); free(dsc, M_TEMP); } /* * Get the information needed to remove the pNFS Data Server file from the * Metadata file. Upon success, ddvp is set non-NULL to the locked * DS directory vnode. The caller must unlock *ddvp when done with it. */ static void nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, int *mirrorcntp, char *fname, fhandle_t *fhp) { struct vattr va; struct ucred *tcred; char *buf; int buflen, error; dvpp[0] = NULL; /* If not an exported regular file or not a pNFS server, just return. */ if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || nfsrv_devidcnt == 0) return; /* Check to see if this is the last hard link. */ tcred = newnfs_getcred(); error = VOP_GETATTR(vp, &va, tcred); NFSFREECRED(tcred); if (error != 0) { printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); return; } if (va.va_nlink > 1) return; error = nfsvno_getfh(vp, fhp, p); if (error != 0) { printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); return; } buflen = 1024; buf = malloc(buflen, M_TEMP, M_WAITOK); /* Get the directory vnode for the DS mount and the file handle. */ error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp, NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); free(buf, M_TEMP); if (error != 0) printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); } /* * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. * The arguments are in a structure, so that they can be passed through * taskqueue for a kernel process to execute this function. */ struct nfsrvdsremove { int done; int inprog; struct task tsk; struct ucred *tcred; struct vnode *dvp; NFSPROC_T *p; int err; char fname[PNFS_FILENAME_LEN + 1]; }; static int nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, NFSPROC_T *p) { struct nameidata named; struct vnode *nvp; char *bufp; u_long *hashp; int error; error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); if (error != 0) return (error); named.ni_cnd.cn_nameiop = DELETE; named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; named.ni_cnd.cn_cred = tcred; named.ni_cnd.cn_thread = p; named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; nfsvno_setpathbuf(&named, &bufp, &hashp); named.ni_cnd.cn_nameptr = bufp; named.ni_cnd.cn_namelen = strlen(fname); strlcpy(bufp, fname, NAME_MAX); NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); if (error == 0) { error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); vput(nvp); } NFSVOPUNLOCK(dvp); nfsvno_relpathbuf(&named); if (error != 0) printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); return (error); } /* * Start up the thread that will execute nfsrv_dsremove(). */ static void start_dsremove(void *arg, int pending) { struct nfsrvdsremove *dsrm; dsrm = (struct nfsrvdsremove *)arg; dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, dsrm->p); dsrm->done = 1; NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); } /* * Remove a pNFS data file from a Data Server. * nfsrv_pnfsremovesetup() must have been called before the MDS file was * removed to set up the dvp and fill in the FH. */ static void nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp, NFSPROC_T *p) { struct ucred *tcred; struct nfsrvdsremove *dsrm, *tdsrm; struct nfsdevice *ds; struct nfsmount *nmp; int failpos, i, ret, timo; tcred = newnfs_getcred(); dsrm = NULL; if (mirrorcnt > 1) dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); /* * Remove the file on each DS mirror, using kernel process(es) for the * additional mirrors. */ failpos = -1; for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { tdsrm->tcred = tcred; tdsrm->p = p; tdsrm->dvp = dvp[i]; strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); tdsrm->inprog = 0; tdsrm->done = 0; tdsrm->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_dsremove, tdsrm); NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_dsremove(dvp[i], fname, tcred, p); if (failpos == -1 && nfsds_failerr(ret)) failpos = i; } } ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret)) failpos = mirrorcnt - 1; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; /* Wait for kernel task(s) to complete. */ for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { while (tdsrm->inprog != 0 && tdsrm->done == 0) tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); if (failpos == -1 && nfsds_failerr(tdsrm->err)) failpos = i; } /* * If failpos has been set, that mirror has failed, so it needs * to be disabled. */ if (failpos >= 0) { nmp = VFSTONFS(dvp[failpos]->v_mount); NFSLOCKMNT(nmp); if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { nmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(nmp); ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, ds); if (ds != NULL) nfsrv_killrpcs(nmp); NFSLOCKMNT(nmp); nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(nmp); } NFSUNLOCKMNT(nmp); } /* Get rid all layouts for the file. */ nfsrv_freefilelayouts(fhp); NFSFREECRED(tcred); free(dsrm, M_TEMP); } /* * Generate a file name based on the file handle and put it in *bufp. * Return the number of bytes generated. */ static int nfsrv_putfhname(fhandle_t *fhp, char *bufp) { int i; uint8_t *cp; const uint8_t *hexdigits = "0123456789abcdef"; cp = (uint8_t *)fhp; for (i = 0; i < sizeof(*fhp); i++) { bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; } bufp[2 * i] = '\0'; return (2 * i); } /* * Update the Metadata file's attributes from the DS file when a Read/Write * layout is returned. * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. */ int nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) { struct ucred *tcred; int error; /* Do this as root so that it won't fail with EACCES. */ tcred = newnfs_getcred(); error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, NULL, NULL, NULL, nap, NULL, NULL, 0, NULL); NFSFREECRED(tcred); return (error); } /* * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. */ static int nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, NFSPROC_T *p) { int error; error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL, NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL); return (error); } static int nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, struct thread *p, int ioproc, struct mbuf **mpp, char *cp, struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp, off_t *offp, int content, bool *eofp) { struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; fhandle_t fh[NFSDEV_MAXMIRRORS]; struct vnode *dvp[NFSDEV_MAXMIRRORS]; struct nfsdevice *ds; struct pnfsdsattr dsattr; struct opnfsdsattr odsattr; char *buf; int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; NFSD_DEBUG(4, "in nfsrv_proxyds\n"); /* * If not a regular file, not exported or not a pNFS server, * just return ENOENT. */ if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || nfsrv_devidcnt == 0) return (ENOENT); buflen = 1024; buf = malloc(buflen, M_TEMP, M_WAITOK); error = 0; /* * For Getattr, get the Change attribute (va_filerev) and size (va_size) * from the MetaData file's extended attribute. */ if (ioproc == NFSPROC_GETATTR) { error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, p); if (error == 0) { if (buflen == sizeof(odsattr)) { NFSBCOPY(buf, &odsattr, buflen); nap->na_filerev = odsattr.dsa_filerev; nap->na_size = odsattr.dsa_size; nap->na_atime = odsattr.dsa_atime; nap->na_mtime = odsattr.dsa_mtime; /* * Fake na_bytes by rounding up na_size. * Since we don't know the block size, just * use BLKDEV_IOSIZE. */ nap->na_bytes = (odsattr.dsa_size + BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1); } else if (buflen == sizeof(dsattr)) { NFSBCOPY(buf, &dsattr, buflen); nap->na_filerev = dsattr.dsa_filerev; nap->na_size = dsattr.dsa_size; nap->na_atime = dsattr.dsa_atime; nap->na_mtime = dsattr.dsa_mtime; nap->na_bytes = dsattr.dsa_bytes; } else error = ENXIO; } if (error == 0) { /* * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() * returns 0, just return now. nfsrv_checkdsattr() * returns 0 if there is no Read/Write layout * plus either an Open/Write_access or Write * delegation issued to a client for the file. */ if (nfsrv_pnfsgetdsattr == 0 || nfsrv_checkdsattr(vp, p) == 0) { free(buf, M_TEMP); return (error); } } /* * Clear ENOATTR so the code below will attempt to do a * nfsrv_getattrdsrpc() to get the attributes and (re)create * the extended attribute. */ if (error == ENOATTR) error = 0; } origmircnt = -1; trycnt = 0; tryagain: if (error == 0) { buflen = 1024; if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) printf("nfsrv_proxyds: Readds vp exclusively locked\n"); error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL, NULL, NULL); if (error == 0) { for (i = 0; i < mirrorcnt; i++) nmp[i] = VFSTONFS(dvp[i]->v_mount); } else printf("pNFS: proxy getextattr sockaddr=%d\n", error); } else printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); if (error == 0) { failpos = -1; if (origmircnt == -1) origmircnt = mirrorcnt; /* * If failpos is set to a mirror#, then that mirror has * failed and will be disabled. For Read, Getattr and Seek, the * function only tries one mirror, so if that mirror has * failed, it will need to be retried. As such, increment * tryitagain for these cases. * For Write, Setattr and Setacl, the function tries all * mirrors and will not return an error for the case where * one mirror has failed. For these cases, the functioning * mirror(s) will have been modified, so a retry isn't * necessary. These functions will set failpos for the * failed mirror#. */ if (ioproc == NFSPROC_READDS) { error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], mpp, mpp2); if (nfsds_failerr(error) && mirrorcnt > 1) { /* * Setting failpos will cause the mirror * to be disabled and then a retry of this * read is required. */ failpos = 0; error = 0; trycnt++; } } else if (ioproc == NFSPROC_WRITEDS) error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, &nmp[0], mirrorcnt, mpp, cp, &failpos); else if (ioproc == NFSPROC_SETATTR) error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], mirrorcnt, nap, &failpos); else if (ioproc == NFSPROC_SETACL) error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], mirrorcnt, aclp, &failpos); else if (ioproc == NFSPROC_SEEKDS) { error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred, p, nmp[0]); if (nfsds_failerr(error) && mirrorcnt > 1) { /* * Setting failpos will cause the mirror * to be disabled and then a retry of this * read is required. */ failpos = 0; error = 0; trycnt++; } } else if (ioproc == NFSPROC_ALLOCATE) error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp, &nmp[0], mirrorcnt, &failpos); else { error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, vp, nmp[mirrorcnt - 1], nap); if (nfsds_failerr(error) && mirrorcnt > 1) { /* * Setting failpos will cause the mirror * to be disabled and then a retry of this * getattr is required. */ failpos = mirrorcnt - 1; error = 0; trycnt++; } } ds = NULL; if (failpos >= 0) { failnmp = nmp[failpos]; NFSLOCKMNT(failnmp); if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(failnmp); ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, failnmp, p); NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", failpos, ds); if (ds != NULL) nfsrv_killrpcs(failnmp); NFSLOCKMNT(failnmp); failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(failnmp); } NFSUNLOCKMNT(failnmp); } for (i = 0; i < mirrorcnt; i++) NFSVOPUNLOCK(dvp[i]); NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, trycnt); /* Try the Read/Getattr again if a mirror was deleted. */ if (ds != NULL && trycnt > 0 && trycnt < origmircnt) goto tryagain; } else { /* Return ENOENT for any Extended Attribute error. */ error = ENOENT; } free(buf, M_TEMP); NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); return (error); } /* * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended * attribute. * newnmpp - If it points to a non-NULL nmp, that is the destination and needs * to be checked. If it points to a NULL nmp, then it returns * a suitable destination. * curnmp - If non-NULL, it is the source mount for the copy. */ int nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp, char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, struct nfsmount *curnmp, int *ippos, int *dsdirp) { struct vnode *dvp, *nvp = NULL, **tdvpp; struct mount *mp; struct nfsmount *nmp, *newnmp; struct sockaddr *sad; struct sockaddr_in *sin; struct nfsdevice *ds, *tds, *fndds; struct pnfsdsfile *pf; uint32_t dsdir; int error, fhiszero, fnd, gotone, i, mirrorcnt; ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); *mirrorcntp = 1; tdvpp = dvpp; if (nvpp != NULL) *nvpp = NULL; if (dvpp != NULL) *dvpp = NULL; if (ippos != NULL) *ippos = -1; if (newnmpp != NULL) newnmp = *newnmpp; else newnmp = NULL; mp = vp->v_mount; error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", buflenp, buf, p); mirrorcnt = *buflenp / sizeof(*pf); if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || *buflenp != sizeof(*pf) * mirrorcnt)) error = ENOATTR; pf = (struct pnfsdsfile *)buf; /* If curnmp != NULL, check for a match in the mirror list. */ if (curnmp != NULL && error == 0) { fnd = 0; for (i = 0; i < mirrorcnt; i++, pf++) { sad = (struct sockaddr *)&pf->dsf_sin; if (nfsaddr2_match(sad, curnmp->nm_nam)) { if (ippos != NULL) *ippos = i; fnd = 1; break; } } if (fnd == 0) error = ENXIO; } gotone = 0; pf = (struct pnfsdsfile *)buf; NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt, error); for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { fhiszero = 0; sad = (struct sockaddr *)&pf->dsf_sin; sin = &pf->dsf_sin; dsdir = pf->dsf_dir; if (dsdir >= nfsrv_dsdirsize) { printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); error = ENOATTR; } else if (nvpp != NULL && newnmp != NULL && nfsaddr2_match(sad, newnmp->nm_nam)) error = EEXIST; if (error == 0) { if (ippos != NULL && curnmp == NULL && sad->sa_family == AF_INET && sin->sin_addr.s_addr == 0) *ippos = i; if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) fhiszero = 1; /* Use the socket address to find the mount point. */ fndds = NULL; NFSDDSLOCK(); /* Find a match for the IP address. */ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp != NULL) { dvp = ds->nfsdev_dvp; nmp = VFSTONFS(dvp->v_mount); if (nmp != ds->nfsdev_nmp) printf("different2 nmp %p %p\n", nmp, ds->nfsdev_nmp); if (nfsaddr2_match(sad, nmp->nm_nam)) { fndds = ds; break; } } } if (fndds != NULL && newnmpp != NULL && newnmp == NULL) { /* Search for a place to make a mirror copy. */ TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { if (tds->nfsdev_nmp != NULL && fndds != tds && ((tds->nfsdev_mdsisset == 0 && fndds->nfsdev_mdsisset == 0) || (tds->nfsdev_mdsisset != 0 && fndds->nfsdev_mdsisset != 0 && fsidcmp(&tds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0))) { *newnmpp = tds->nfsdev_nmp; break; } } if (tds != NULL) { /* * Move this entry to the end of the * list, so it won't be selected as * easily the next time. */ TAILQ_REMOVE(&nfsrv_devidhead, tds, nfsdev_list); TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds, nfsdev_list); } } NFSDDSUNLOCK(); if (fndds != NULL) { dvp = fndds->nfsdev_dsdir[dsdir]; if (lktype != 0 || fhiszero != 0 || (nvpp != NULL && *nvpp == NULL)) { if (fhiszero != 0) error = vn_lock(dvp, LK_EXCLUSIVE); else if (lktype != 0) error = vn_lock(dvp, lktype); else error = vn_lock(dvp, LK_SHARED); /* * If the file handle is all 0's, try to * do a Lookup against the DS to acquire * it. * If dvpp == NULL or the Lookup fails, * unlock dvp after the call. */ if (error == 0 && (fhiszero != 0 || (nvpp != NULL && *nvpp == NULL))) { error = nfsrv_pnfslookupds(vp, dvp, pf, &nvp, p); if (error == 0) { if (fhiszero != 0) nfsrv_pnfssetfh( vp, pf, devid, fnamep, nvp, p); if (nvpp != NULL && *nvpp == NULL) { *nvpp = nvp; *dsdirp = dsdir; } else vput(nvp); } if (error != 0 || lktype == 0) NFSVOPUNLOCK(dvp); } } if (error == 0) { gotone++; NFSD_DEBUG(4, "gotone=%d\n", gotone); if (devid != NULL) { NFSBCOPY(fndds->nfsdev_deviceid, devid, NFSX_V4DEVICEID); devid += NFSX_V4DEVICEID; } if (dvpp != NULL) *tdvpp++ = dvp; if (fhp != NULL) NFSBCOPY(&pf->dsf_fh, fhp++, NFSX_MYFH); if (fnamep != NULL && gotone == 1) strlcpy(fnamep, pf->dsf_filename, sizeof(pf->dsf_filename)); } else NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " "err=%d\n", error); } } } if (error == 0 && gotone == 0) error = ENOENT; NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, error); if (error == 0) *mirrorcntp = gotone; else { if (gotone > 0 && dvpp != NULL) { /* * If the error didn't occur on the first one and * dvpp != NULL, the one(s) prior to the failure will * have locked dvp's that need to be unlocked. */ for (i = 0; i < gotone; i++) { NFSVOPUNLOCK(*dvpp); *dvpp++ = NULL; } } /* * If it found the vnode to be copied from before a failure, * it needs to be vput()'d. */ if (nvpp != NULL && *nvpp != NULL) { vput(*nvpp); *nvpp = NULL; } } return (error); } /* * Set the extended attribute for the Change attribute. */ static int nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) { struct pnfsdsattr dsattr; int error; ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); dsattr.dsa_filerev = nap->na_filerev; dsattr.dsa_size = nap->na_size; dsattr.dsa_atime = nap->na_atime; dsattr.dsa_mtime = nap->na_mtime; dsattr.dsa_bytes = nap->na_bytes; error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); if (error != 0) printf("pNFS: setextattr=%d\n", error); return (error); } static int nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) { uint32_t *tl; struct nfsrv_descript *nd; nfsv4stateid_t st; struct mbuf *m, *m2; int error = 0, retlen, tlen, trimlen; NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); *mpp = NULL; /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), NULL, NULL, 0, 0); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); txdr_hyper(off, tl); *(tl + 2) = txdr_unsigned(len); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); NFSM_STRSIZ(retlen, len); if (retlen > 0) { /* Trim off the pre-data XDR from the mbuf chain. */ m = nd->nd_mrep; while (m != NULL && m != nd->nd_md) { if (m->m_next == nd->nd_md) { m->m_next = NULL; m_freem(nd->nd_mrep); nd->nd_mrep = m = nd->nd_md; } else m = m->m_next; } if (m == NULL) { printf("nfsrv_readdsrpc: busted mbuf list\n"); error = ENOENT; goto nfsmout; } /* * Now, adjust first mbuf so that any XDR before the * read data is skipped over. */ trimlen = nd->nd_dpos - mtod(m, char *); if (trimlen > 0) { m->m_len -= trimlen; NFSM_DATAP(m, trimlen); } /* * Truncate the mbuf chain at retlen bytes of data, * plus XDR padding that brings the length up to a * multiple of 4. */ tlen = NFSM_RNDUP(retlen); do { if (m->m_len >= tlen) { m->m_len = tlen; tlen = 0; m2 = m->m_next; m->m_next = NULL; m_freem(m2); break; } tlen -= m->m_len; m = m->m_next; } while (m != NULL); if (tlen > 0) { printf("nfsrv_readdsrpc: busted mbuf list\n"); error = ENOENT; goto nfsmout; } *mpp = nd->nd_mrep; *mpendp = m; nd->nd_mrep = NULL; } } else error = nd->nd_repstat; nfsmout: /* If nd->nd_mrep is already NULL, this is a no-op. */ m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); return (error); } /* * Do a write RPC on a DS data file, using this structure for the arguments, * so that this function can be executed by a separate kernel process. */ struct nfsrvwritedsdorpc { int done; int inprog; struct task tsk; fhandle_t fh; off_t off; int len; struct nfsmount *nmp; struct ucred *cred; NFSPROC_T *p; struct mbuf *m; int err; }; static int nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript *nd; nfsattrbit_t attrbits; nfsv4stateid_t st; int commit, error, retlen; nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); txdr_hyper(off, tl); tl += 2; /* * Do all writes FileSync, since the server doesn't hold onto dirty * buffers. Since clients should be accessing the DS servers directly * using the pNFS layouts, this just needs to work correctly as a * fallback. */ *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); *tl = txdr_unsigned(len); NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); /* Put data in mbuf chain. */ nd->nd_mb->m_next = m; /* Set nd_mb and nd_bpos to end of data. */ while (m->m_next != NULL) m = m->m_next; nd->nd_mb = m; nfsm_set(nd, m->m_len); NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); /* Do a Getattr for the attributes that change upon writing. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); /* Get rid of weak cache consistency data for now. */ if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); if (error != 0) goto nfsmout; /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); retlen = fxdr_unsigned(int, *tl++); commit = fxdr_unsigned(int, *tl); if (commit != NFSWRITE_FILESYNC) error = NFSERR_IO; NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", retlen, commit, error); } else error = nd->nd_repstat; /* We have no use for the Write Verifier since we use FileSync. */ /* * Get the Change, Size, Access Time and Modify Time attributes and set * on the Metadata file, so its attributes will be what the file's * would be if it had been written. */ if (error == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); return (error); } /* * Start up the thread that will execute nfsrv_writedsdorpc(). */ static void start_writedsdorpc(void *arg, int pending) { struct nfsrvwritedsdorpc *drpc; drpc = (struct nfsrvwritedsdorpc *)arg; drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, drpc->len, NULL, drpc->m, drpc->cred, drpc->p); drpc->done = 1; NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); } static int nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct mbuf **mpp, char *cp, int *failposp) { struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL; struct nfsvattr na; struct mbuf *m; int error, i, offs, ret, timo; NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* Calculate offset in mbuf chain that data starts. */ offs = cp - mtod(*mpp, char *); NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len); /* * Do the write RPC for every DS, using a separate kernel process * for every DS except the last one. */ error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { tdrpc->done = 0; NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); tdrpc->off = off; tdrpc->len = len; tdrpc->nmp = *nmpp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->inprog = 0; tdrpc->err = 0; tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_writedsdorpc, tdrpc); NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, tdrpc->m, cred, p); if (nfsds_failerr(ret) && *failposp == -1) *failposp = i; else if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) *failposp = mirrorcnt - 1; else if (error == 0 && ret != 0) error = ret; if (error == 0) error = nfsrv_setextattr(vp, &na, p); NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { /* Wait for RPCs on separate threads to complete. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); if (nfsds_failerr(tdrpc->err) && *failposp == -1) *failposp = i; else if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); return (error); } /* * Do a allocate RPC on a DS data file, using this structure for the arguments, * so that this function can be executed by a separate kernel process. */ struct nfsrvallocatedsdorpc { int done; int inprog; struct task tsk; fhandle_t fh; off_t off; off_t len; struct nfsmount *nmp; struct ucred *cred; NFSPROC_T *p; int err; }; static int nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript *nd; nfsattrbit_t attrbits; nfsv4stateid_t st; int error; nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n", nd->nd_repstat); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } else error = nd->nd_repstat; NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error); nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error); return (error); } /* * Start up the thread that will execute nfsrv_allocatedsdorpc(). */ static void start_allocatedsdorpc(void *arg, int pending) { struct nfsrvallocatedsdorpc *drpc; drpc = (struct nfsrvallocatedsdorpc *)arg; drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, drpc->len, NULL, drpc->cred, drpc->p); drpc->done = 1; NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err); } static int nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, int *failposp) { struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL; struct nfsvattr na; int error, i, ret, timo; NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n"); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* * Do the allocate RPC for every DS, using a separate kernel process * for every DS except the last one. */ error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { tdrpc->done = 0; NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); tdrpc->off = off; tdrpc->len = len; tdrpc->nmp = *nmpp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->inprog = 0; tdrpc->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc); NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL, cred, p); if (nfsds_failerr(ret) && *failposp == -1) *failposp = i; else if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) *failposp = mirrorcnt - 1; else if (error == 0 && ret != 0) error = ret; if (error == 0) error = nfsrv_setextattr(vp, &na, p); NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { /* Wait for RPCs on separate threads to complete. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); if (nfsds_failerr(tdrpc->err) && *failposp == -1) *failposp = i; else if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); return (error); } static int nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, struct nfsvattr *dsnap) { uint32_t *tl; struct nfsrv_descript *nd; nfsv4stateid_t st; nfsattrbit_t attrbits; int error; NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), NULL, NULL, 0, 0); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); /* Do a Getattr for the attributes that change due to writing. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", nd->nd_repstat); /* Get rid of weak cache consistency data for now. */ if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); if (error != 0) goto nfsmout; /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; } error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error != 0) goto nfsmout; if (nd->nd_repstat != 0) error = nd->nd_repstat; /* * Get the Change, Size, Access Time and Modify Time attributes and set * on the Metadata file, so its attributes will be what the file's * would be if it had been written. */ if (error == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); return (error); } struct nfsrvsetattrdsdorpc { int done; int inprog; struct task tsk; fhandle_t fh; struct nfsmount *nmp; struct vnode *vp; struct ucred *cred; NFSPROC_T *p; struct nfsvattr na; struct nfsvattr dsna; int err; }; /* * Start up the thread that will execute nfsrv_setattrdsdorpc(). */ static void start_setattrdsdorpc(void *arg, int pending) { struct nfsrvsetattrdsdorpc *drpc; drpc = (struct nfsrvsetattrdsdorpc *)arg; drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); drpc->done = 1; } static int nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct nfsvattr *nap, int *failposp) { struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL; struct nfsvattr na; int error, i, ret, timo; NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* * Do the setattr RPC for every DS, using a separate kernel process * for every DS except the last one. */ error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { tdrpc->done = 0; tdrpc->inprog = 0; NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); tdrpc->nmp = *nmpp; tdrpc->vp = vp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->na = *nap; tdrpc->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); if (nfsds_failerr(ret) && *failposp == -1) *failposp = i; else if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) *failposp = mirrorcnt - 1; else if (error == 0 && ret != 0) error = ret; if (error == 0) error = nfsrv_setextattr(vp, &na, p); NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { /* Wait for RPCs on separate threads to complete. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); if (nfsds_failerr(tdrpc->err) && *failposp == -1) *failposp = i; else if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); return (error); } /* * Do a Setattr of an NFSv4 ACL on the DS file. */ static int nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) { struct nfsrv_descript *nd; nfsv4stateid_t st; nfsattrbit_t attrbits; int error; NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), NULL, NULL, 0, 0); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); /* * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), * so passing in the metadata "vp" will be ok, since it is of * the same type (VREG). */ nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, 0, NULL); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", nd->nd_repstat); error = nd->nd_repstat; m_freem(nd->nd_mrep); free(nd, M_TEMP); return (error); } struct nfsrvsetacldsdorpc { int done; int inprog; struct task tsk; fhandle_t fh; struct nfsmount *nmp; struct vnode *vp; struct ucred *cred; NFSPROC_T *p; struct acl *aclp; int err; }; /* * Start up the thread that will execute nfsrv_setacldsdorpc(). */ static void start_setacldsdorpc(void *arg, int pending) { struct nfsrvsetacldsdorpc *drpc; drpc = (struct nfsrvsetacldsdorpc *)arg; drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, drpc->vp, drpc->nmp, drpc->aclp); drpc->done = 1; } static int nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, int *failposp) { struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL; int error, i, ret, timo; NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* * Do the setattr RPC for every DS, using a separate kernel process * for every DS except the last one. */ error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { tdrpc->done = 0; tdrpc->inprog = 0; NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); tdrpc->nmp = *nmpp; tdrpc->vp = vp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->aclp = aclp; tdrpc->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); if (nfsds_failerr(ret) && *failposp == -1) *failposp = i; else if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) *failposp = mirrorcnt - 1; else if (error == 0 && ret != 0) error = ret; NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { /* Wait for RPCs on separate threads to complete. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); if (nfsds_failerr(tdrpc->err) && *failposp == -1) *failposp = i; else if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); return (error); } /* * Getattr call to the DS for the attributes that change due to writing. */ static int nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) { struct nfsrv_descript *nd; int error; nfsattrbit_t attrbits; NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); (void) nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", nd->nd_repstat); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); /* * We can only save the updated values in the extended * attribute if the vp is exclusively locked. * This should happen when any of the following operations * occur on the vnode: * Close, Delegreturn, LayoutCommit, LayoutReturn * As such, the updated extended attribute should get saved * before nfsrv_checkdsattr() returns 0 and allows the cached * attributes to be returned without calling this function. */ if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { error = nfsrv_setextattr(vp, nap, p); NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", error); } } else error = nd->nd_repstat; m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); return (error); } /* * Seek call to a DS. */ static int nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp) { uint32_t *tl; struct nfsrv_descript *nd; nfsv4stateid_t st; int error; NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n"); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); txdr_hyper(*offp, tl); tl += 2; *tl = txdr_unsigned(content); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); if (*tl++ == newnfs_true) *eofp = true; else *eofp = false; *offp = fxdr_hyper(tl); } else error = nd->nd_repstat; nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error); return (error); } /* * Get the device id and file handle for a DS file. */ int nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, fhandle_t *fhp, char *devid) { int buflen, error; char *buf; buflen = 1024; buf = malloc(buflen, M_TEMP, M_WAITOK); error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL, fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL); free(buf, M_TEMP); return (error); } /* * Do a Lookup against the DS for the filename. */ static int nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, struct vnode **nvpp, NFSPROC_T *p) { struct nameidata named; struct ucred *tcred; char *bufp; u_long *hashp; struct vnode *nvp; int error; tcred = newnfs_getcred(); named.ni_cnd.cn_nameiop = LOOKUP; named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; named.ni_cnd.cn_cred = tcred; named.ni_cnd.cn_thread = p; named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; nfsvno_setpathbuf(&named, &bufp, &hashp); named.ni_cnd.cn_nameptr = bufp; named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); strlcpy(bufp, pf->dsf_filename, NAME_MAX); NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); NFSFREECRED(tcred); nfsvno_relpathbuf(&named); if (error == 0) *nvpp = nvp; NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); return (error); } /* * Set the file handle to the correct one. */ static void nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid, char *fnamep, struct vnode *nvp, NFSPROC_T *p) { struct nfsnode *np; int ret = 0; np = VTONFS(nvp); NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); /* * We can only do a vn_set_extattr() if the vnode is exclusively * locked and vn_start_write() has been done. If devid != NULL or * fnamep != NULL or the vnode is shared locked, vn_start_write() * may not have been done. * If not done now, it will be done on a future call. */ if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) ret = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf), (char *)pf, p); NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); } /* * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point * when the DS has failed. */ void nfsrv_killrpcs(struct nfsmount *nmp) { /* * Call newnfs_nmcancelreqs() to cause * any RPCs in progress on the mount point to * fail. * This will cause any process waiting for an * RPC to complete while holding a vnode lock * on the mounted-on vnode (such as "df" or * a non-forced "umount") to fail. * This will unlock the mounted-on vnode so * a forced dismount can succeed. * The NFSMNTP_CANCELRPCS flag should be set when this function is * called. */ newnfs_nmcancelreqs(nmp); } /* * Sum up the statfs info for each of the DSs, so that the client will * receive the total for all DSs. */ static int nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp) { struct statfs *tsf; struct nfsdevice *ds; struct vnode **dvpp, **tdvpp, *dvp; uint64_t tot; int cnt, error = 0, i; if (nfsrv_devidcnt <= 0) return (ENXIO); dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); /* Get an array of the dvps for the DSs. */ tdvpp = dvpp; i = 0; NFSDDSLOCK(); /* First, search for matches for same file system. */ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) { if (++i > nfsrv_devidcnt) break; *tdvpp++ = ds->nfsdev_dvp; } } /* * If no matches for same file system, total all servers not assigned * to a file system. */ if (i == 0) { TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset == 0) { if (++i > nfsrv_devidcnt) break; *tdvpp++ = ds->nfsdev_dvp; } } } NFSDDSUNLOCK(); cnt = i; /* Do a VFS_STATFS() for each of the DSs and sum them up. */ tdvpp = dvpp; for (i = 0; i < cnt && error == 0; i++) { dvp = *tdvpp++; error = VFS_STATFS(dvp->v_mount, tsf); if (error == 0) { if (sf->f_bsize == 0) { if (tsf->f_bsize > 0) sf->f_bsize = tsf->f_bsize; else sf->f_bsize = 8192; } if (tsf->f_blocks > 0) { if (sf->f_bsize != tsf->f_bsize) { tot = tsf->f_blocks * tsf->f_bsize; sf->f_blocks += (tot / sf->f_bsize); } else sf->f_blocks += tsf->f_blocks; } if (tsf->f_bfree > 0) { if (sf->f_bsize != tsf->f_bsize) { tot = tsf->f_bfree * tsf->f_bsize; sf->f_bfree += (tot / sf->f_bsize); } else sf->f_bfree += tsf->f_bfree; } if (tsf->f_bavail > 0) { if (sf->f_bsize != tsf->f_bsize) { tot = tsf->f_bavail * tsf->f_bsize; sf->f_bavail += (tot / sf->f_bsize); } else sf->f_bavail += tsf->f_bavail; } } } free(tsf, M_TEMP); free(dvpp, M_TEMP); return (error); } /* * Set an NFSv4 acl. */ int nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) { int error; if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { error = NFSERR_ATTRNOTSUPP; goto out; } /* * With NFSv4 ACLs, chmod(2) may need to add additional entries. * Make sure it has enough room for that - splitting every entry * into two and appending "canonical six" entries at the end. * Cribbed out of kern/vfs_acl.c - Rick M. */ if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { error = NFSERR_ATTRNOTSUPP; goto out; } error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); if (error == 0) { error = nfsrv_dssetacl(vp, aclp, cred, p); if (error == ENOENT) error = 0; } out: NFSEXITCODE(error); return (error); } /* * Seek vnode op call (actually it is a VOP_IOCTL()). * This function is called with the vnode locked, but unlocks and vrele()s * the vp before returning. */ int nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd, off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p) { struct nfsvattr at; int error, ret; ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp"); /* * Attempt to seek on a DS file. A return of ENOENT implies * there is no DS file to seek on. */ error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL, NULL, NULL, NULL, NULL, offp, content, eofp); if (error != ENOENT) { vput(vp); return (error); } /* * Do the VOP_IOCTL() call. For the case where *offp == file_size, * VOP_IOCTL() will return ENXIO. However, the correct reply for * NFSv4.2 is *eofp == true and error == 0 for this case. */ NFSVOPUNLOCK(vp); error = VOP_IOCTL(vp, cmd, offp, 0, cred, p); *eofp = false; if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) { /* Handle the cases where we might be at EOF. */ ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); if (ret == 0 && *offp == at.na_size) { *eofp = true; error = 0; } if (ret != 0 && error == 0) error = ret; } vrele(vp); NFSEXITCODE(error); return (error); } /* * Allocate vnode op call. */ int nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, NFSPROC_T *p) { int error, trycnt; ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp"); /* * Attempt to allocate on a DS file. A return of ENOENT implies * there is no DS file to allocate on. */ error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL, NULL, NULL, NULL, NULL, &len, 0, NULL); if (error != ENOENT) return (error); error = 0; /* * Do the actual VOP_ALLOCATE(), looping a reasonable number of * times to achieve completion. */ trycnt = 0; while (error == 0 && len > 0 && trycnt++ < 20) error = VOP_ALLOCATE(vp, &off, &len); if (error == 0 && len > 0) error = NFSERR_IO; NFSEXITCODE(error); return (error); } /* * Get Extended Atribute vnode op into an mbuf list. */ int nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp, struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) { struct iovec *iv; struct uio io, *uiop = &io; struct mbuf *m, *m2; int alen, error, len, tlen; size_t siz; /* First, find out the size of the extended attribute. */ error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, &siz, cred, p); if (error != 0) return (NFSERR_NOXATTR); if (siz > maxresp - NFS_MAXXDR) return (NFSERR_XATTR2BIG); len = siz; tlen = NFSM_RNDUP(len); if (tlen > 0) { /* * If cnt > MCLBYTES and the reply will not be saved, use * ext_pgs mbufs for TLS. * For NFSv4.0, we do not know for sure if the reply will * be saved, so do not use ext_pgs mbufs for NFSv4.0. * Always use ext_pgs mbufs if ND_EXTPG is set. */ if ((flag & ND_EXTPG) != 0 || (tlen > MCLBYTES && (flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS && (flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)) uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen, maxextsiz, &m, &m2, &iv); else uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2, &iv); uiop->uio_iov = iv; } else { uiop->uio_iovcnt = 0; uiop->uio_iov = iv = NULL; m = m2 = NULL; } uiop->uio_offset = 0; uiop->uio_resid = tlen; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = p; #ifdef MAC error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER, name); if (error != 0) goto out; #endif if (tlen > 0) error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, NULL, cred, p); if (error != 0) goto out; if (uiop->uio_resid > 0) { alen = tlen; len = tlen - uiop->uio_resid; tlen = NFSM_RNDUP(len); if (alen != tlen) printf("nfsvno_getxattr: weird size read\n"); if (tlen == 0) { m_freem(m); m = m2 = NULL; } else if (alen != tlen || tlen != len) m2 = nfsrv_adj(m, alen - tlen, tlen - len); } *lenp = len; *mpp = m; *mpendp = m2; out: if (error != 0) { if (m != NULL) m_freem(m); *lenp = 0; } free(iv, M_TEMP); NFSEXITCODE(error); return (error); } /* * Set Extended attribute vnode op from an mbuf list. */ int nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m, char *cp, struct ucred *cred, struct thread *p) { struct iovec *iv; struct uio uio, *uiop = &uio; int cnt, error; error = 0; #ifdef MAC error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER, name); #endif if (error != 0) goto out; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = p; uiop->uio_offset = 0; uiop->uio_resid = len; if (len > 0) { error = nfsrv_createiovecw(len, m, cp, &iv, &cnt); uiop->uio_iov = iv; uiop->uio_iovcnt = cnt; } else { uiop->uio_iov = iv = NULL; uiop->uio_iovcnt = 0; } if (error == 0) { error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, cred, p); free(iv, M_TEMP); } out: NFSEXITCODE(error); return (error); } /* * Remove Extended attribute vnode op. */ int nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name, struct ucred *cred, struct thread *p) { int error; /* * Get rid of any delegations. I am not sure why this is required, * but RFC-8276 says so. */ error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p); if (error != 0) goto out; #ifdef MAC error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER, name); if (error != 0) goto out; #endif error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p); if (error == EOPNOTSUPP) error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, cred, p); out: NFSEXITCODE(error); return (error); } /* * List Extended Atribute vnode op into an mbuf list. */ int nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred, struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp) { struct iovec iv; struct uio io; int error; size_t siz; *bufp = NULL; /* First, find out the size of the extended attribute. */ error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred, p); if (error != 0) return (NFSERR_NOXATTR); if (siz <= cookie) { *lenp = 0; *eofp = true; goto out; } if (siz > cookie + *lenp) { siz = cookie + *lenp; *eofp = false; } else *eofp = true; /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */ if (siz > 10 * 1024 * 1024) { error = NFSERR_XATTR2BIG; goto out; } *bufp = malloc(siz, M_TEMP, M_WAITOK); iv.iov_base = *bufp; iv.iov_len = siz; io.uio_iovcnt = 1; io.uio_iov = &iv; io.uio_offset = 0; io.uio_resid = siz; io.uio_rw = UIO_READ; io.uio_segflg = UIO_SYSSPACE; io.uio_td = p; #ifdef MAC error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER); if (error != 0) goto out; #endif error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred, p); if (error != 0) goto out; if (io.uio_resid > 0) siz -= io.uio_resid; *lenp = siz; out: if (error != 0) { free(*bufp, M_TEMP); *bufp = NULL; } NFSEXITCODE(error); return (error); } /* * Trim trailing data off the mbuf list being built. */ static void nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos, int bextpg, int bextpgsiz) { vm_page_t pg; int fullpgsiz, i; if (mb->m_next != NULL) { m_freem(mb->m_next); mb->m_next = NULL; } if ((mb->m_flags & M_EXTPG) != 0) { /* First, get rid of any pages after this position. */ for (i = mb->m_epg_npgs - 1; i > bextpg; i--) { pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]); vm_page_unwire_noq(pg); vm_page_free(pg); } mb->m_epg_npgs = bextpg + 1; if (bextpg == 0) fullpgsiz = PAGE_SIZE - mb->m_epg_1st_off; else fullpgsiz = PAGE_SIZE; mb->m_epg_last_len = fullpgsiz - bextpgsiz; mb->m_len = m_epg_pagelen(mb, 0, mb->m_epg_1st_off); for (i = 1; i < mb->m_epg_npgs; i++) mb->m_len += m_epg_pagelen(mb, i, 0); nd->nd_bextpgsiz = bextpgsiz; nd->nd_bextpg = bextpg; } else mb->m_len = bpos - mtod(mb, char *); nd->nd_mb = mb; nd->nd_bpos = bpos; } extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfsd_modevent(module_t mod, int type, void *data) { int error = 0, i; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) goto out; newnfs_portinit(); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, MTX_DEF); mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, MTX_DEF); } mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); nfsrvd_initcache(); nfsd_init(); NFSD_LOCK(); nfsrvd_init(0); NFSD_UNLOCK(); nfsd_mntinit(); #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; #endif nfsd_call_servertimer = nfsrv_servertimer; nfsd_call_nfsd = nfssvc_nfsd; loaded = 1; break; case MOD_UNLOAD: if (newnfs_numnfsd != 0) { error = EBUSY; break; } #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = NULL; vn_deleg_ops.vndeleg_disable = NULL; #endif nfsd_call_servertimer = NULL; nfsd_call_nfsd = NULL; /* Clean out all NFSv4 state. */ nfsrv_throwawayallstate(curthread); /* Clean the NFS server reply cache */ nfsrvd_cleancache(); /* Free up the krpc server pool. */ if (nfsrvd_pool != NULL) svcpool_destroy(nfsrvd_pool); /* and get rid of the locks */ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_destroy(&nfsrchash_table[i].mtx); mtx_destroy(&nfsrcahash_table[i].mtx); } mtx_destroy(&nfsrc_udpmtx); mtx_destroy(&nfs_v4root_mutex); mtx_destroy(&nfsv4root_mnt.mnt_mtx); mtx_destroy(&nfsrv_dontlistlock_mtx); mtx_destroy(&nfsrv_recalllock_mtx); for (i = 0; i < nfsrv_sessionhashsize; i++) mtx_destroy(&nfssessionhash[i].mtx); if (nfslayouthash != NULL) { for (i = 0; i < nfsrv_layouthashsize; i++) mtx_destroy(&nfslayouthash[i].mtx); free(nfslayouthash, M_NFSDSESSION); } lockdestroy(&nfsv4root_mnt.mnt_explock); free(nfsclienthash, M_NFSDCLIENT); free(nfslockhash, M_NFSDLOCKFILE); free(nfssessionhash, M_NFSDSESSION); loaded = 0; break; default: error = EOPNOTSUPP; break; } out: NFSEXITCODE(error); return (error); } static moduledata_t nfsd_mod = { "nfsd", nfsd_modevent, NULL, }; DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfsd, 1); MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); MODULE_DEPEND(nfsd, krpc, 1, 1, 1); MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); Index: projects/nfs-over-tls/sys/sys/mount.h =================================================================== --- projects/nfs-over-tls/sys/sys/mount.h (revision 364495) +++ projects/nfs-over-tls/sys/sys/mount.h (revision 364496) @@ -1,1116 +1,1141 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)mount.h 8.21 (Berkeley) 5/20/95 * $FreeBSD$ */ #ifndef _SYS_MOUNT_H_ #define _SYS_MOUNT_H_ #include #include #ifdef _KERNEL #include #include #include #include #include #endif /* * NOTE: When changing statfs structure, mount structure, MNT_* flags or * MNTK_* flags also update DDB show mount command in vfs_subr.c. */ typedef struct fsid { int32_t val[2]; } fsid_t; /* filesystem id type */ #define fsidcmp(a, b) memcmp((a), (b), sizeof(fsid_t)) /* * File identifier. * These are unique per filesystem on a single machine. * * Note that the offset of fid_data is 4 bytes, so care must be taken to avoid * undefined behavior accessing unaligned fields within an embedded struct. */ #define MAXFIDSZ 16 struct fid { u_short fid_len; /* length of data in bytes */ u_short fid_data0; /* force longword alignment */ char fid_data[MAXFIDSZ]; /* data (variable length) */ }; /* * filesystem statistics */ #define MFSNAMELEN 16 /* length of type name including null */ #define MNAMELEN 1024 /* size of on/from name bufs */ #define STATFS_VERSION 0x20140518 /* current version number */ struct statfs { uint32_t f_version; /* structure version number */ uint32_t f_type; /* type of filesystem */ uint64_t f_flags; /* copy of mount exported flags */ uint64_t f_bsize; /* filesystem fragment size */ uint64_t f_iosize; /* optimal transfer block size */ uint64_t f_blocks; /* total data blocks in filesystem */ uint64_t f_bfree; /* free blocks in filesystem */ int64_t f_bavail; /* free blocks avail to non-superuser */ uint64_t f_files; /* total file nodes in filesystem */ int64_t f_ffree; /* free nodes avail to non-superuser */ uint64_t f_syncwrites; /* count of sync writes since mount */ uint64_t f_asyncwrites; /* count of async writes since mount */ uint64_t f_syncreads; /* count of sync reads since mount */ uint64_t f_asyncreads; /* count of async reads since mount */ uint64_t f_spare[10]; /* unused spare */ uint32_t f_namemax; /* maximum filename length */ uid_t f_owner; /* user that mounted the filesystem */ fsid_t f_fsid; /* filesystem id */ char f_charspare[80]; /* spare string space */ char f_fstypename[MFSNAMELEN]; /* filesystem type name */ char f_mntfromname[MNAMELEN]; /* mounted filesystem */ char f_mntonname[MNAMELEN]; /* directory on which mounted */ }; #if defined(_WANT_FREEBSD11_STATFS) || defined(_KERNEL) #define FREEBSD11_STATFS_VERSION 0x20030518 /* current version number */ struct freebsd11_statfs { uint32_t f_version; /* structure version number */ uint32_t f_type; /* type of filesystem */ uint64_t f_flags; /* copy of mount exported flags */ uint64_t f_bsize; /* filesystem fragment size */ uint64_t f_iosize; /* optimal transfer block size */ uint64_t f_blocks; /* total data blocks in filesystem */ uint64_t f_bfree; /* free blocks in filesystem */ int64_t f_bavail; /* free blocks avail to non-superuser */ uint64_t f_files; /* total file nodes in filesystem */ int64_t f_ffree; /* free nodes avail to non-superuser */ uint64_t f_syncwrites; /* count of sync writes since mount */ uint64_t f_asyncwrites; /* count of async writes since mount */ uint64_t f_syncreads; /* count of sync reads since mount */ uint64_t f_asyncreads; /* count of async reads since mount */ uint64_t f_spare[10]; /* unused spare */ uint32_t f_namemax; /* maximum filename length */ uid_t f_owner; /* user that mounted the filesystem */ fsid_t f_fsid; /* filesystem id */ char f_charspare[80]; /* spare string space */ char f_fstypename[16]; /* filesystem type name */ char f_mntfromname[88]; /* mounted filesystem */ char f_mntonname[88]; /* directory on which mounted */ }; #endif /* _WANT_FREEBSD11_STATFS || _KERNEL */ #ifdef _KERNEL #define OMFSNAMELEN 16 /* length of fs type name, including null */ #define OMNAMELEN (88 - 2 * sizeof(long)) /* size of on/from name bufs */ /* XXX getfsstat.2 is out of date with write and read counter changes here. */ /* XXX statfs.2 is out of date with read counter changes here. */ struct ostatfs { long f_spare2; /* placeholder */ long f_bsize; /* fundamental filesystem block size */ long f_iosize; /* optimal transfer block size */ long f_blocks; /* total data blocks in filesystem */ long f_bfree; /* free blocks in fs */ long f_bavail; /* free blocks avail to non-superuser */ long f_files; /* total file nodes in filesystem */ long f_ffree; /* free file nodes in fs */ fsid_t f_fsid; /* filesystem id */ uid_t f_owner; /* user that mounted the filesystem */ int f_type; /* type of filesystem */ int f_flags; /* copy of mount exported flags */ long f_syncwrites; /* count of sync writes since mount */ long f_asyncwrites; /* count of async writes since mount */ char f_fstypename[OMFSNAMELEN]; /* fs type name */ char f_mntonname[OMNAMELEN]; /* directory on which mounted */ long f_syncreads; /* count of sync reads since mount */ long f_asyncreads; /* count of async reads since mount */ short f_spares1; /* unused spare */ char f_mntfromname[OMNAMELEN];/* mounted filesystem */ short f_spares2; /* unused spare */ /* * XXX on machines where longs are aligned to 8-byte boundaries, there * is an unnamed int32_t here. This spare was after the apparent end * of the struct until we bit off the read counters from f_mntonname. */ long f_spare[2]; /* unused spare */ }; TAILQ_HEAD(vnodelst, vnode); /* Mount options list */ TAILQ_HEAD(vfsoptlist, vfsopt); struct vfsopt { TAILQ_ENTRY(vfsopt) link; char *name; void *value; int len; int pos; int seen; }; /* * Structure per mounted filesystem. Each mounted filesystem has an * array of operations and an instance record. The filesystems are * put on a doubly linked list. * * Lock reference: * l - mnt_listmtx * m - mountlist_mtx * i - interlock * v - vnode freelist mutex * * Unmarked fields are considered stable as long as a ref is held. * */ struct mount { struct mtx mnt_mtx; /* mount structure interlock */ int mnt_gen; /* struct mount generation */ #define mnt_startzero mnt_list TAILQ_ENTRY(mount) mnt_list; /* (m) mount list */ struct vfsops *mnt_op; /* operations on fs */ struct vfsconf *mnt_vfc; /* configuration info */ struct vnode *mnt_vnodecovered; /* vnode we mounted on */ struct vnode *mnt_syncer; /* syncer vnode */ int mnt_ref; /* (i) Reference count */ struct vnodelst mnt_nvnodelist; /* (i) list of vnodes */ int mnt_nvnodelistsize; /* (i) # of vnodes */ int mnt_writeopcount; /* (i) write syscalls pending */ int mnt_kern_flag; /* (i) kernel only flags */ uint64_t mnt_flag; /* (i) flags shared with user */ struct vfsoptlist *mnt_opt; /* current mount options */ struct vfsoptlist *mnt_optnew; /* new options passed to fs */ int mnt_maxsymlinklen; /* max size of short symlink */ struct statfs mnt_stat; /* cache of filesystem stats */ struct ucred *mnt_cred; /* credentials of mounter */ void * mnt_data; /* private data */ time_t mnt_time; /* last time written*/ int mnt_iosize_max; /* max size for clusters, etc */ struct netexport *mnt_export; /* export list */ struct label *mnt_label; /* MAC label for the fs */ u_int mnt_hashseed; /* Random seed for vfs_hash */ int mnt_lockref; /* (i) Lock reference count */ int mnt_secondary_writes; /* (i) # of secondary writes */ int mnt_secondary_accwrites;/* (i) secondary wr. starts */ struct thread *mnt_susp_owner; /* (i) thread owning suspension */ #define mnt_endzero mnt_gjprovider char *mnt_gjprovider; /* gjournal provider name */ struct mtx mnt_listmtx; struct vnodelst mnt_lazyvnodelist; /* (l) list of lazy vnodes */ int mnt_lazyvnodelistsize; /* (l) # of lazy vnodes */ struct lock mnt_explock; /* vfs_export walkers lock */ TAILQ_ENTRY(mount) mnt_upper_link; /* (m) we in the all uppers */ TAILQ_HEAD(, mount) mnt_uppers; /* (m) upper mounts over us*/ int __aligned(CACHE_LINE_SIZE) mnt_vfs_ops;/* (i) pending vfs ops */ int *mnt_thread_in_ops_pcpu; int *mnt_ref_pcpu; int *mnt_lockref_pcpu; int *mnt_writeopcount_pcpu; struct vnode *mnt_rootvnode; }; /* * Definitions for MNT_VNODE_FOREACH_ALL. */ struct vnode *__mnt_vnode_next_all(struct vnode **mvp, struct mount *mp); struct vnode *__mnt_vnode_first_all(struct vnode **mvp, struct mount *mp); void __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp); #define MNT_VNODE_FOREACH_ALL(vp, mp, mvp) \ for (vp = __mnt_vnode_first_all(&(mvp), (mp)); \ (vp) != NULL; vp = __mnt_vnode_next_all(&(mvp), (mp))) #define MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp) \ do { \ MNT_ILOCK(mp); \ __mnt_vnode_markerfree_all(&(mvp), (mp)); \ /* MNT_IUNLOCK(mp); -- done in above function */ \ mtx_assert(MNT_MTX(mp), MA_NOTOWNED); \ } while (0) /* * Definitions for MNT_VNODE_FOREACH_LAZY. */ typedef int mnt_lazy_cb_t(struct vnode *, void *); struct vnode *__mnt_vnode_next_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb, void *cbarg); struct vnode *__mnt_vnode_first_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb, void *cbarg); void __mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp); #define MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, cb, cbarg) \ for (vp = __mnt_vnode_first_lazy(&(mvp), (mp), (cb), (cbarg)); \ (vp) != NULL; \ vp = __mnt_vnode_next_lazy(&(mvp), (mp), (cb), (cbarg))) #define MNT_VNODE_FOREACH_LAZY_ABORT(mp, mvp) \ __mnt_vnode_markerfree_lazy(&(mvp), (mp)) #define MNT_ILOCK(mp) mtx_lock(&(mp)->mnt_mtx) #define MNT_ITRYLOCK(mp) mtx_trylock(&(mp)->mnt_mtx) #define MNT_IUNLOCK(mp) mtx_unlock(&(mp)->mnt_mtx) #define MNT_MTX(mp) (&(mp)->mnt_mtx) #define MNT_REF(mp) do { \ mtx_assert(MNT_MTX(mp), MA_OWNED); \ mp->mnt_ref++; \ } while (0) #define MNT_REL(mp) do { \ mtx_assert(MNT_MTX(mp), MA_OWNED); \ (mp)->mnt_ref--; \ if ((mp)->mnt_vfs_ops && (mp)->mnt_ref < 0) \ vfs_dump_mount_counters(mp); \ if ((mp)->mnt_ref == 0 && (mp)->mnt_vfs_ops) \ wakeup((mp)); \ } while (0) #endif /* _KERNEL */ +#if defined(_WANT_MNTOPTNAMES) || defined(_KERNEL) +struct mntoptnames { + uint64_t o_opt; + const char *o_name; +}; +#define MNTOPT_NAMES \ + { MNT_ASYNC, "asynchronous" }, \ + { MNT_EXPORTED, "NFS exported" }, \ + { MNT_LOCAL, "local" }, \ + { MNT_NOATIME, "noatime" }, \ + { MNT_NOEXEC, "noexec" }, \ + { MNT_NOSUID, "nosuid" }, \ + { MNT_NOSYMFOLLOW, "nosymfollow" }, \ + { MNT_QUOTA, "with quotas" }, \ + { MNT_RDONLY, "read-only" }, \ + { MNT_SYNCHRONOUS, "synchronous" }, \ + { MNT_UNION, "union" }, \ + { MNT_NOCLUSTERR, "noclusterr" }, \ + { MNT_NOCLUSTERW, "noclusterw" }, \ + { MNT_SUIDDIR, "suiddir" }, \ + { MNT_SOFTDEP, "soft-updates" }, \ + { MNT_SUJ, "journaled soft-updates" }, \ + { MNT_MULTILABEL, "multilabel" }, \ + { MNT_ACLS, "acls" }, \ + { MNT_NFS4ACLS, "nfsv4acls" }, \ + { MNT_GJOURNAL, "gjournal" }, \ + { MNT_AUTOMOUNTED, "automounted" }, \ + { MNT_VERIFIED, "verified" }, \ + { MNT_UNTRUSTED, "untrusted" }, \ + { MNT_NOCOVER, "nocover" }, \ + { MNT_EMPTYDIR, "emptydir" }, \ + { MNT_UPDATE, "update" }, \ + { MNT_DELEXPORT, "delexport" }, \ + { MNT_RELOAD, "reload" }, \ + { MNT_FORCE, "force" }, \ + { MNT_SNAPSHOT, "snapshot" }, \ + { 0, NULL } +#endif + /* * User specifiable flags, stored in mnt_flag. */ #define MNT_RDONLY 0x0000000000000001ULL /* read only filesystem */ #define MNT_SYNCHRONOUS 0x0000000000000002ULL /* fs written synchronously */ #define MNT_NOEXEC 0x0000000000000004ULL /* can't exec from filesystem */ #define MNT_NOSUID 0x0000000000000008ULL /* don't honor setuid fs bits */ #define MNT_NFS4ACLS 0x0000000000000010ULL /* enable NFS version 4 ACLs */ #define MNT_UNION 0x0000000000000020ULL /* union with underlying fs */ #define MNT_ASYNC 0x0000000000000040ULL /* fs written asynchronously */ #define MNT_SUIDDIR 0x0000000000100000ULL /* special SUID dir handling */ #define MNT_SOFTDEP 0x0000000000200000ULL /* using soft updates */ #define MNT_NOSYMFOLLOW 0x0000000000400000ULL /* do not follow symlinks */ #define MNT_GJOURNAL 0x0000000002000000ULL /* GEOM journal support enabled */ #define MNT_MULTILABEL 0x0000000004000000ULL /* MAC support for objects */ #define MNT_ACLS 0x0000000008000000ULL /* ACL support enabled */ #define MNT_NOATIME 0x0000000010000000ULL /* dont update file access time */ #define MNT_NOCLUSTERR 0x0000000040000000ULL /* disable cluster read */ #define MNT_NOCLUSTERW 0x0000000080000000ULL /* disable cluster write */ #define MNT_SUJ 0x0000000100000000ULL /* using journaled soft updates */ #define MNT_AUTOMOUNTED 0x0000000200000000ULL /* mounted by automountd(8) */ #define MNT_UNTRUSTED 0x0000000800000000ULL /* filesys metadata untrusted */ /* * NFS export related mount flags. */ #define MNT_EXRDONLY 0x0000000000000080ULL /* exported read only */ #define MNT_EXPORTED 0x0000000000000100ULL /* filesystem is exported */ #define MNT_DEFEXPORTED 0x0000000000000200ULL /* exported to the world */ #define MNT_EXPORTANON 0x0000000000000400ULL /* anon uid mapping for all */ #define MNT_EXKERB 0x0000000000000800ULL /* exported with Kerberos */ #define MNT_EXPUBLIC 0x0000000020000000ULL /* public export (WebNFS) */ +#define MNT_EXTLS 0x0000004000000000ULL /* require TLS */ +#define MNT_EXTLSCERT 0x0000008000000000ULL /* require TLS with client cert */ +#define MNT_EXTLSCERTUSER 0x0000010000000000ULL /* require TLS with user cert */ /* * Flags set by internal operations, * but visible to the user. * XXX some of these are not quite right.. (I've never seen the root flag set) */ #define MNT_LOCAL 0x0000000000001000ULL /* filesystem is stored locally */ #define MNT_QUOTA 0x0000000000002000ULL /* quotas are enabled on fs */ #define MNT_ROOTFS 0x0000000000004000ULL /* identifies the root fs */ #define MNT_USER 0x0000000000008000ULL /* mounted by a user */ #define MNT_IGNORE 0x0000000000800000ULL /* do not show entry in df */ #define MNT_VERIFIED 0x0000000400000000ULL /* filesystem is verified */ /* * Mask of flags that are visible to statfs(). * XXX I think that this could now become (~(MNT_CMDFLAGS)) * but the 'mount' program may need changing to handle this. */ #define MNT_VISFLAGMASK (MNT_RDONLY | MNT_SYNCHRONOUS | MNT_NOEXEC | \ MNT_NOSUID | MNT_UNION | MNT_SUJ | \ MNT_ASYNC | MNT_EXRDONLY | MNT_EXPORTED | \ MNT_DEFEXPORTED | MNT_EXPORTANON| MNT_EXKERB | \ MNT_LOCAL | MNT_USER | MNT_QUOTA | \ MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \ MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \ MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \ MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | \ MNT_NFS4ACLS | MNT_AUTOMOUNTED | MNT_VERIFIED | \ MNT_UNTRUSTED) /* Mask of flags that can be updated. */ #define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | \ MNT_NOATIME | \ MNT_NOSYMFOLLOW | MNT_IGNORE | \ MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | \ MNT_ACLS | MNT_USER | MNT_NFS4ACLS | \ MNT_AUTOMOUNTED | MNT_UNTRUSTED) - -/* - * Export flags that are only set/used in the ex_flags field and - * not in mnt_flag. Since some are defined as MNT_EXxxx above, - * these ones are just redefined here. - * MNT_EXPORTED must remain and be set/cleared in mnt_flag. - * The others defined as MNT_xxx should probably remain for - * compatibility with old versions of mountd, etc. - */ -#define MNTEX_TLS 0x0000000000000001ULL /* TLS RPC required */ -#define MNTEX_TLSCERT 0x0000000000000002ULL /* verified cert req */ -#define MNTEX_TLSCERTUSER 0x0000000000000004ULL /* user cert req */ -#define MNTEX_EXPORTED MNT_EXPORTED /* filesystem exported */ -#define MNTEX_RDONLY MNT_EXRDONLY /* exported read only */ -#define MNTEX_EXPORTANON MNT_EXPORTANON /* anon uid mapping for all */ -#define MNTEX_KERB MNT_EXKERB /* exported with Kerberos */ -#define MNTEX_PUBLIC MNT_EXPUBLIC /* public export (WebNFS) */ /* * External filesystem command modifier flags. * Unmount can use the MNT_FORCE flag. * XXX: These are not STATES and really should be somewhere else. * XXX: MNT_BYFSID and MNT_NONBUSY collide with MNT_ACLS and MNT_MULTILABEL, * but because MNT_ACLS and MNT_MULTILABEL are only used for mount(2), * and MNT_BYFSID and MNT_NONBUSY are only used for unmount(2), * it's harmless. */ #define MNT_UPDATE 0x0000000000010000ULL /* not real mount, just update */ #define MNT_DELEXPORT 0x0000000000020000ULL /* delete export host lists */ #define MNT_RELOAD 0x0000000000040000ULL /* reload filesystem data */ #define MNT_FORCE 0x0000000000080000ULL /* force unmount or readonly */ #define MNT_SNAPSHOT 0x0000000001000000ULL /* snapshot the filesystem */ #define MNT_NONBUSY 0x0000000004000000ULL /* check vnode use counts. */ #define MNT_BYFSID 0x0000000008000000ULL /* specify filesystem by ID. */ #define MNT_NOCOVER 0x0000001000000000ULL /* Do not cover a mount point */ #define MNT_EMPTYDIR 0x0000002000000000ULL /* Only mount on empty dir */ #define MNT_CMDFLAGS (MNT_UPDATE | MNT_DELEXPORT | MNT_RELOAD | \ MNT_FORCE | MNT_SNAPSHOT | MNT_NONBUSY | \ MNT_BYFSID | MNT_NOCOVER | MNT_EMPTYDIR) /* * Internal filesystem control flags stored in mnt_kern_flag. * * MNTK_UNMOUNT locks the mount entry so that name lookup cannot * proceed past the mount point. This keeps the subtree stable during * mounts and unmounts. When non-forced unmount flushes all vnodes * from the mp queue, the MNTK_UNMOUNT flag prevents insmntque() from * queueing new vnodes. * * MNTK_UNMOUNTF permits filesystems to detect a forced unmount while * dounmount() is still waiting to lock the mountpoint. This allows * the filesystem to cancel operations that might otherwise deadlock * with the unmount attempt (used by NFS). */ #define MNTK_UNMOUNTF 0x00000001 /* forced unmount in progress */ #define MNTK_ASYNC 0x00000002 /* filtered async flag */ #define MNTK_SOFTDEP 0x00000004 /* async disabled by softdep */ #define MNTK_NOMSYNC 0x00000008 /* don't do msync */ #define MNTK_DRAINING 0x00000010 /* lock draining is happening */ #define MNTK_REFEXPIRE 0x00000020 /* refcount expiring is happening */ #define MNTK_EXTENDED_SHARED 0x00000040 /* Allow shared locking for more ops */ #define MNTK_SHARED_WRITES 0x00000080 /* Allow shared locking for writes */ #define MNTK_NO_IOPF 0x00000100 /* Disallow page faults during reads and writes. Filesystem shall properly handle i/o state on EFAULT. */ #define MNTK_VGONE_UPPER 0x00000200 #define MNTK_VGONE_WAITER 0x00000400 #define MNTK_LOOKUP_EXCL_DOTDOT 0x00000800 #define MNTK_MARKER 0x00001000 #define MNTK_UNMAPPED_BUFS 0x00002000 #define MNTK_USES_BCACHE 0x00004000 /* FS uses the buffer cache. */ #define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */ #define MNTK_VMSETSIZE_BUG 0x00010000 #define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */ #define MNTK_FPLOOKUP 0x00040000 /* fast path lookup is supported */ #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ #define MNTK_SUSPEND 0x08000000 /* request write suspension */ #define MNTK_SUSPEND2 0x04000000 /* block secondary writes */ #define MNTK_SUSPENDED 0x10000000 /* write operations are suspended */ #define MNTK_NULL_NOCACHE 0x20000000 /* auto disable cache for nullfs mounts over this fs */ #define MNTK_LOOKUP_SHARED 0x40000000 /* FS supports shared lock lookups */ #define MNTK_NOKNOTE 0x80000000 /* Don't send KNOTEs from VOP hooks */ #ifdef _KERNEL static inline int MNT_SHARED_WRITES(struct mount *mp) { return (mp != NULL && (mp->mnt_kern_flag & MNTK_SHARED_WRITES) != 0); } static inline int MNT_EXTENDED_SHARED(struct mount *mp) { return (mp != NULL && (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED) != 0); } #endif /* * Sysctl CTL_VFS definitions. * * Second level identifier specifies which filesystem. Second level * identifier VFS_VFSCONF returns information about all filesystems. * Second level identifier VFS_GENERIC is non-terminal. */ #define VFS_VFSCONF 0 /* get configured filesystems */ #define VFS_GENERIC 0 /* generic filesystem information */ /* * Third level identifiers for VFS_GENERIC are given below; third * level identifiers for specific filesystems are given in their * mount specific header files. */ #define VFS_MAXTYPENUM 1 /* int: highest defined filesystem type */ #define VFS_CONF 2 /* struct: vfsconf for filesystem given as next argument */ /* * Flags for various system call interfaces. * * waitfor flags to vfs_sync() and getfsstat() */ #define MNT_WAIT 1 /* synchronously wait for I/O to complete */ #define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ #define MNT_LAZY 3 /* push data not written by filesystem syncer */ #define MNT_SUSPEND 4 /* Suspend file system after sync */ /* * Generic file handle */ struct fhandle { fsid_t fh_fsid; /* Filesystem id of mount point */ struct fid fh_fid; /* Filesys specific id */ }; typedef struct fhandle fhandle_t; /* * Old export arguments without security flavor list */ struct oexport_args { int ex_flags; /* export related flags */ uid_t ex_root; /* mapping for root uid */ struct xucred ex_anon; /* mapping for anonymous user */ struct sockaddr *ex_addr; /* net address to which exported */ u_char ex_addrlen; /* and the net address length */ struct sockaddr *ex_mask; /* mask of valid bits in saddr */ u_char ex_masklen; /* and the smask length */ char *ex_indexfile; /* index file for WebNFS URLs */ }; /* * Not quite so old export arguments with 32bit ex_flags and xucred ex_anon. */ #define MAXSECFLAVORS 5 struct o2export_args { int ex_flags; /* export related flags */ uid_t ex_root; /* mapping for root uid */ struct xucred ex_anon; /* mapping for anonymous user */ struct sockaddr *ex_addr; /* net address to which exported */ u_char ex_addrlen; /* and the net address length */ struct sockaddr *ex_mask; /* mask of valid bits in saddr */ u_char ex_masklen; /* and the smask length */ char *ex_indexfile; /* index file for WebNFS URLs */ int ex_numsecflavors; /* security flavor count */ int ex_secflavors[MAXSECFLAVORS]; /* list of security flavors */ }; /* * Export arguments for local filesystem mount calls. */ struct export_args { uint64_t ex_flags; /* export related flags */ uid_t ex_root; /* mapping for root uid */ uid_t ex_uid; /* mapping for anonymous user */ int ex_ngroups; gid_t *ex_groups; struct sockaddr *ex_addr; /* net address to which exported */ u_char ex_addrlen; /* and the net address length */ struct sockaddr *ex_mask; /* mask of valid bits in saddr */ u_char ex_masklen; /* and the smask length */ char *ex_indexfile; /* index file for WebNFS URLs */ int ex_numsecflavors; /* security flavor count */ int ex_secflavors[MAXSECFLAVORS]; /* list of security flavors */ }; /* * Structure holding information for a publicly exported filesystem * (WebNFS). Currently the specs allow just for one such filesystem. */ struct nfs_public { int np_valid; /* Do we hold valid information */ fhandle_t np_handle; /* Filehandle for pub fs (internal) */ struct mount *np_mount; /* Mountpoint of exported fs */ char *np_index; /* Index file */ }; /* * Filesystem configuration information. One of these exists for each * type of filesystem supported by the kernel. These are searched at * mount time to identify the requested filesystem. * * XXX: Never change the first two arguments! */ struct vfsconf { u_int vfc_version; /* ABI version number */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ struct vfsops *vfc_vfsops; /* filesystem operations vector */ struct vfsops *vfc_vfsops_sd; /* ... signal-deferred */ int vfc_typenum; /* historic filesystem type number */ int vfc_refcount; /* number mounted of this type */ int vfc_flags; /* permanent flags */ int vfc_prison_flag; /* prison allow.mount.* flag */ struct vfsoptdecl *vfc_opts; /* mount options */ TAILQ_ENTRY(vfsconf) vfc_list; /* list of vfscons */ }; /* Userland version of the struct vfsconf. */ struct xvfsconf { struct vfsops *vfc_vfsops; /* filesystem operations vector */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ int vfc_typenum; /* historic filesystem type number */ int vfc_refcount; /* number mounted of this type */ int vfc_flags; /* permanent flags */ struct vfsconf *vfc_next; /* next in list */ }; #ifndef BURN_BRIDGES struct ovfsconf { void *vfc_vfsops; char vfc_name[32]; int vfc_index; int vfc_refcount; int vfc_flags; }; #endif /* * NB: these flags refer to IMPLEMENTATION properties, not properties of * any actual mounts; i.e., it does not make sense to change the flags. */ #define VFCF_STATIC 0x00010000 /* statically compiled into kernel */ #define VFCF_NETWORK 0x00020000 /* may get data over the network */ #define VFCF_READONLY 0x00040000 /* writes are not implemented */ #define VFCF_SYNTHETIC 0x00080000 /* data does not represent real files */ #define VFCF_LOOPBACK 0x00100000 /* aliases some other mounted FS */ #define VFCF_UNICODE 0x00200000 /* stores file names as Unicode */ #define VFCF_JAIL 0x00400000 /* can be mounted from within a jail */ #define VFCF_DELEGADMIN 0x00800000 /* supports delegated administration */ #define VFCF_SBDRY 0x01000000 /* Stop at Boundary: defer stop requests to kernel->user (AST) transition */ typedef uint32_t fsctlop_t; struct vfsidctl { int vc_vers; /* should be VFSIDCTL_VERS1 (below) */ fsid_t vc_fsid; /* fsid to operate on */ char vc_fstypename[MFSNAMELEN]; /* type of fs 'nfs' or '*' */ fsctlop_t vc_op; /* operation VFS_CTL_* (below) */ void *vc_ptr; /* pointer to data structure */ size_t vc_len; /* sizeof said structure */ u_int32_t vc_spare[12]; /* spare (must be zero) */ }; /* vfsidctl API version. */ #define VFS_CTL_VERS1 0x01 /* * New style VFS sysctls, do not reuse/conflict with the namespace for * private sysctls. * All "global" sysctl ops have the 33rd bit set: * 0x...1.... * Private sysctl ops should have the 33rd bit unset. */ #define VFS_CTL_QUERY 0x00010001 /* anything wrong? (vfsquery) */ #define VFS_CTL_TIMEO 0x00010002 /* set timeout for vfs notification */ #define VFS_CTL_NOLOCKS 0x00010003 /* disable file locking */ struct vfsquery { u_int32_t vq_flags; u_int32_t vq_spare[31]; }; /* vfsquery flags */ #define VQ_NOTRESP 0x0001 /* server down */ #define VQ_NEEDAUTH 0x0002 /* server bad auth */ #define VQ_LOWDISK 0x0004 /* we're low on space */ #define VQ_MOUNT 0x0008 /* new filesystem arrived */ #define VQ_UNMOUNT 0x0010 /* filesystem has left */ #define VQ_DEAD 0x0020 /* filesystem is dead, needs force unmount */ #define VQ_ASSIST 0x0040 /* filesystem needs assistance from external program */ #define VQ_NOTRESPLOCK 0x0080 /* server lockd down */ #define VQ_FLAG0100 0x0100 /* placeholder */ #define VQ_FLAG0200 0x0200 /* placeholder */ #define VQ_FLAG0400 0x0400 /* placeholder */ #define VQ_FLAG0800 0x0800 /* placeholder */ #define VQ_FLAG1000 0x1000 /* placeholder */ #define VQ_FLAG2000 0x2000 /* placeholder */ #define VQ_FLAG4000 0x4000 /* placeholder */ #define VQ_FLAG8000 0x8000 /* placeholder */ #ifdef _KERNEL /* Point a sysctl request at a vfsidctl's data. */ #define VCTLTOREQ(vc, req) \ do { \ (req)->newptr = (vc)->vc_ptr; \ (req)->newlen = (vc)->vc_len; \ (req)->newidx = 0; \ } while (0) #endif struct iovec; struct uio; #ifdef _KERNEL /* * vfs_busy specific flags and mask. */ #define MBF_NOWAIT 0x01 #define MBF_MNTLSTLOCK 0x02 #define MBF_MASK (MBF_NOWAIT | MBF_MNTLSTLOCK) #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_MOUNT); MALLOC_DECLARE(M_STATFS); #endif extern int maxvfsconf; /* highest defined filesystem type */ TAILQ_HEAD(vfsconfhead, vfsconf); extern struct vfsconfhead vfsconf; /* * Operations supported on mounted filesystem. */ struct mount_args; struct nameidata; struct sysctl_req; struct mntarg; /* * N.B., vfs_cmount is the ancient vfsop invoked by the old mount(2) syscall. * The new way is vfs_mount. * * vfs_cmount implementations typically translate arguments from their * respective old per-FS structures into the key-value list supported by * nmount(2), then use kernel_mount(9) to mimic nmount(2) from kernelspace. * * Filesystems with mounters that use nmount(2) do not need to and should not * implement vfs_cmount. Hopefully a future cleanup can remove vfs_cmount and * mount(2) entirely. */ typedef int vfs_cmount_t(struct mntarg *ma, void *data, uint64_t flags); typedef int vfs_unmount_t(struct mount *mp, int mntflags); typedef int vfs_root_t(struct mount *mp, int flags, struct vnode **vpp); typedef int vfs_quotactl_t(struct mount *mp, int cmds, uid_t uid, void *arg); typedef int vfs_statfs_t(struct mount *mp, struct statfs *sbp); typedef int vfs_sync_t(struct mount *mp, int waitfor); typedef int vfs_vget_t(struct mount *mp, ino_t ino, int flags, struct vnode **vpp); typedef int vfs_fhtovp_t(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp); typedef int vfs_checkexp_t(struct mount *mp, struct sockaddr *nam, uint64_t *extflagsp, struct ucred **credanonp, int *numsecflavors, int *secflavors); typedef int vfs_init_t(struct vfsconf *); typedef int vfs_uninit_t(struct vfsconf *); typedef int vfs_extattrctl_t(struct mount *mp, int cmd, struct vnode *filename_vp, int attrnamespace, const char *attrname); typedef int vfs_mount_t(struct mount *mp); typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op, struct sysctl_req *req); typedef void vfs_susp_clean_t(struct mount *mp); typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp); typedef void vfs_purge_t(struct mount *mp); struct vfsops { vfs_mount_t *vfs_mount; vfs_cmount_t *vfs_cmount; vfs_unmount_t *vfs_unmount; vfs_root_t *vfs_root; vfs_root_t *vfs_cachedroot; vfs_quotactl_t *vfs_quotactl; vfs_statfs_t *vfs_statfs; vfs_sync_t *vfs_sync; vfs_vget_t *vfs_vget; vfs_fhtovp_t *vfs_fhtovp; vfs_checkexp_t *vfs_checkexp; vfs_init_t *vfs_init; vfs_uninit_t *vfs_uninit; vfs_extattrctl_t *vfs_extattrctl; vfs_sysctl_t *vfs_sysctl; vfs_susp_clean_t *vfs_susp_clean; vfs_notify_lowervp_t *vfs_reclaim_lowervp; vfs_notify_lowervp_t *vfs_unlink_lowervp; vfs_purge_t *vfs_purge; vfs_mount_t *vfs_spare[6]; /* spares for ABI compat */ }; vfs_statfs_t __vfs_statfs; #define VFS_MOUNT(MP) ({ \ int _rc; \ \ TSRAW(curthread, TS_ENTER, "VFS_MOUNT", (MP)->mnt_vfc->vfc_name);\ _rc = (*(MP)->mnt_op->vfs_mount)(MP); \ TSRAW(curthread, TS_EXIT, "VFS_MOUNT", (MP)->mnt_vfc->vfc_name);\ _rc; }) #define VFS_UNMOUNT(MP, FORCE) ({ \ int _rc; \ \ _rc = (*(MP)->mnt_op->vfs_unmount)(MP, FORCE); \ _rc; }) #define VFS_ROOT(MP, FLAGS, VPP) ({ \ int _rc; \ \ _rc = (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP); \ _rc; }) #define VFS_CACHEDROOT(MP, FLAGS, VPP) ({ \ int _rc; \ \ _rc = (*(MP)->mnt_op->vfs_cachedroot)(MP, FLAGS, VPP); \ _rc; }) #define VFS_QUOTACTL(MP, C, U, A) ({ \ int _rc; \ \ _rc = (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A); \ _rc; }) #define VFS_STATFS(MP, SBP) ({ \ int _rc; \ \ _rc = __vfs_statfs((MP), (SBP)); \ _rc; }) #define VFS_SYNC(MP, WAIT) ({ \ int _rc; \ \ _rc = (*(MP)->mnt_op->vfs_sync)(MP, WAIT); \ _rc; }) #define VFS_VGET(MP, INO, FLAGS, VPP) ({ \ int _rc; \ \ _rc = (*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP); \ _rc; }) #define VFS_FHTOVP(MP, FIDP, FLAGS, VPP) ({ \ int _rc; \ \ _rc = (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, FLAGS, VPP); \ _rc; }) #define VFS_CHECKEXP(MP, NAM, EXFLG, CRED, NUMSEC, SEC) ({ \ int _rc; \ \ _rc = (*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED, NUMSEC,\ SEC); \ _rc; }) #define VFS_EXTATTRCTL(MP, C, FN, NS, N) ({ \ int _rc; \ \ _rc = (*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N); \ _rc; }) #define VFS_SYSCTL(MP, OP, REQ) ({ \ int _rc; \ \ _rc = (*(MP)->mnt_op->vfs_sysctl)(MP, OP, REQ); \ _rc; }) #define VFS_SUSP_CLEAN(MP) do { \ if (*(MP)->mnt_op->vfs_susp_clean != NULL) { \ (*(MP)->mnt_op->vfs_susp_clean)(MP); \ } \ } while (0) #define VFS_RECLAIM_LOWERVP(MP, VP) do { \ if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL) { \ (*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP)); \ } \ } while (0) #define VFS_UNLINK_LOWERVP(MP, VP) do { \ if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) { \ (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP)); \ } \ } while (0) #define VFS_PURGE(MP) do { \ if (*(MP)->mnt_op->vfs_purge != NULL) { \ (*(MP)->mnt_op->vfs_purge)(MP); \ } \ } while (0) #define VFS_KNOTE_LOCKED(vp, hint) do \ { \ if (((vp)->v_vflag & VV_NOKNOTE) == 0) \ VN_KNOTE((vp), (hint), KNF_LISTLOCKED); \ } while (0) #define VFS_KNOTE_UNLOCKED(vp, hint) do \ { \ if (((vp)->v_vflag & VV_NOKNOTE) == 0) \ VN_KNOTE((vp), (hint), 0); \ } while (0) #define VFS_NOTIFY_UPPER_RECLAIM 1 #define VFS_NOTIFY_UPPER_UNLINK 2 #include /* * Version numbers. */ #define VFS_VERSION_00 0x19660120 #define VFS_VERSION_01 0x20121030 #define VFS_VERSION_02 0x20180504 #define VFS_VERSION VFS_VERSION_02 #define VFS_SET(vfsops, fsname, flags) \ static struct vfsconf fsname ## _vfsconf = { \ .vfc_version = VFS_VERSION, \ .vfc_name = #fsname, \ .vfc_vfsops = &vfsops, \ .vfc_typenum = -1, \ .vfc_flags = flags, \ }; \ static moduledata_t fsname ## _mod = { \ #fsname, \ vfs_modevent, \ & fsname ## _vfsconf \ }; \ DECLARE_MODULE(fsname, fsname ## _mod, SI_SUB_VFS, SI_ORDER_MIDDLE) /* * exported vnode operations */ int dounmount(struct mount *, int, struct thread *); int kernel_mount(struct mntarg *ma, uint64_t flags); int kernel_vmount(int flags, ...); struct mntarg *mount_arg(struct mntarg *ma, const char *name, const void *val, int len); struct mntarg *mount_argb(struct mntarg *ma, int flag, const char *name); struct mntarg *mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...); struct mntarg *mount_argsu(struct mntarg *ma, const char *name, const void *val, int len); void statfs_scale_blocks(struct statfs *sf, long max_size); struct vfsconf *vfs_byname(const char *); struct vfsconf *vfs_byname_kld(const char *, struct thread *td, int *); void vfs_mount_destroy(struct mount *); void vfs_event_signal(fsid_t *, u_int32_t, intptr_t); void vfs_freeopts(struct vfsoptlist *opts); void vfs_deleteopt(struct vfsoptlist *opts, const char *name); int vfs_buildopts(struct uio *auio, struct vfsoptlist **options); int vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w, uint64_t val); int vfs_getopt(struct vfsoptlist *, const char *, void **, int *); int vfs_getopt_pos(struct vfsoptlist *opts, const char *name); int vfs_getopt_size(struct vfsoptlist *opts, const char *name, off_t *value); char *vfs_getopts(struct vfsoptlist *, const char *, int *error); int vfs_copyopt(struct vfsoptlist *, const char *, void *, int); int vfs_filteropt(struct vfsoptlist *, const char **legal); void vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...); int vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...); int vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len); int vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len); int vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value); int vfs_setpublicfs /* set publicly exported fs */ (struct mount *, struct netexport *, struct export_args *); void vfs_periodic(struct mount *, int); int vfs_busy(struct mount *, int); int vfs_export /* process mount export info */ (struct mount *, struct export_args *); void vfs_allocate_syncvnode(struct mount *); void vfs_deallocate_syncvnode(struct mount *); int vfs_donmount(struct thread *td, uint64_t fsflags, struct uio *fsoptions); void vfs_getnewfsid(struct mount *); struct cdev *vfs_getrootfsid(struct mount *); struct mount *vfs_getvfs(fsid_t *); /* return vfs given fsid */ struct mount *vfs_busyfs(fsid_t *); int vfs_modevent(module_t, int, void *); void vfs_mount_error(struct mount *, const char *, ...); void vfs_mountroot(void); /* mount our root filesystem */ void vfs_mountedfrom(struct mount *, const char *from); void vfs_notify_upper(struct vnode *, int); void vfs_ref(struct mount *); void vfs_rel(struct mount *); struct mount *vfs_mount_alloc(struct vnode *, struct vfsconf *, const char *, struct ucred *); int vfs_suser(struct mount *, struct thread *); void vfs_unbusy(struct mount *); void vfs_unmountall(void); extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ extern struct mtx mountlist_mtx; extern struct nfs_public nfs_pub; extern struct sx vfsconf_sx; #define vfsconf_lock() sx_xlock(&vfsconf_sx) #define vfsconf_unlock() sx_xunlock(&vfsconf_sx) #define vfsconf_slock() sx_slock(&vfsconf_sx) #define vfsconf_sunlock() sx_sunlock(&vfsconf_sx) struct vnode *mntfs_allocvp(struct mount *, struct vnode *); void mntfs_freevp(struct vnode *); /* * Declarations for these vfs default operations are located in * kern/vfs_default.c. They will be automatically used to replace * null entries in VFS ops tables when registering a new filesystem * type in the global table. */ vfs_root_t vfs_stdroot; vfs_quotactl_t vfs_stdquotactl; vfs_statfs_t vfs_stdstatfs; vfs_sync_t vfs_stdsync; vfs_sync_t vfs_stdnosync; vfs_vget_t vfs_stdvget; vfs_fhtovp_t vfs_stdfhtovp; vfs_checkexp_t vfs_stdcheckexp; vfs_init_t vfs_stdinit; vfs_uninit_t vfs_stduninit; vfs_extattrctl_t vfs_stdextattrctl; vfs_sysctl_t vfs_stdsysctl; void syncer_suspend(void); void syncer_resume(void); struct vnode *vfs_cache_root_clear(struct mount *); void vfs_cache_root_set(struct mount *, struct vnode *); void vfs_op_barrier_wait(struct mount *); void vfs_op_enter(struct mount *); void vfs_op_exit_locked(struct mount *); void vfs_op_exit(struct mount *); #ifdef DIAGNOSTIC void vfs_assert_mount_counters(struct mount *); void vfs_dump_mount_counters(struct mount *); #else #define vfs_assert_mount_counters(mp) do { } while (0) #define vfs_dump_mount_counters(mp) do { } while (0) #endif enum mount_counter { MNT_COUNT_REF, MNT_COUNT_LOCKREF, MNT_COUNT_WRITEOPCOUNT }; int vfs_mount_fetch_counter(struct mount *, enum mount_counter); /* * Code transitioning mnt_vfs_ops to > 0 issues IPIs until it observes * all CPUs not executing code enclosed by mnt_thread_in_ops_pcpu. * * This provides an invariant that by the time the last CPU is observed not * executing, everyone else entering will see the counter > 0 and exit. * * Note there is no barrier between vfs_ops and the rest of the code in the * section. It is not necessary as the writer has to wait for everyone to drain * before making any changes or only make changes safe while the section is * executed. */ #define vfs_op_thread_entered(mp) ({ \ MPASS(curthread->td_critnest > 0); \ *zpcpu_get(mp->mnt_thread_in_ops_pcpu) == 1; \ }) #define vfs_op_thread_enter_crit(mp) ({ \ bool _retval_crit = true; \ MPASS(curthread->td_critnest > 0); \ MPASS(!vfs_op_thread_entered(mp)); \ zpcpu_set_protected(mp->mnt_thread_in_ops_pcpu, 1); \ __compiler_membar(); \ if (__predict_false(mp->mnt_vfs_ops > 0)) { \ vfs_op_thread_exit_crit(mp); \ _retval_crit = false; \ } \ _retval_crit; \ }) #define vfs_op_thread_enter(mp) ({ \ bool _retval; \ critical_enter(); \ _retval = vfs_op_thread_enter_crit(mp); \ if (__predict_false(!_retval)) \ critical_exit(); \ _retval; \ }) #define vfs_op_thread_exit_crit(mp) do { \ MPASS(vfs_op_thread_entered(mp)); \ __compiler_membar(); \ zpcpu_set_protected(mp->mnt_thread_in_ops_pcpu, 0); \ } while (0) #define vfs_op_thread_exit(mp) do { \ vfs_op_thread_exit_crit(mp); \ critical_exit(); \ } while (0) #define vfs_mp_count_add_pcpu(mp, count, val) do { \ MPASS(vfs_op_thread_entered(mp)); \ zpcpu_add_protected(mp->mnt_##count##_pcpu, val); \ } while (0) #define vfs_mp_count_sub_pcpu(mp, count, val) do { \ MPASS(vfs_op_thread_entered(mp)); \ zpcpu_sub_protected(mp->mnt_##count##_pcpu, val); \ } while (0) #else /* !_KERNEL */ #include struct stat; __BEGIN_DECLS int fhlink(struct fhandle *, const char *); int fhlinkat(struct fhandle *, int, const char *); int fhopen(const struct fhandle *, int); int fhreadlink(struct fhandle *, char *, size_t); int fhstat(const struct fhandle *, struct stat *); int fhstatfs(const struct fhandle *, struct statfs *); int fstatfs(int, struct statfs *); int getfh(const char *, fhandle_t *); int getfhat(int, char *, struct fhandle *, int); int getfsstat(struct statfs *, long, int); int getmntinfo(struct statfs **, int); int lgetfh(const char *, fhandle_t *); int mount(const char *, const char *, int, void *); int nmount(struct iovec *, unsigned int, int); int statfs(const char *, struct statfs *); int unmount(const char *, int); /* C library stuff */ int getvfsbyname(const char *, struct xvfsconf *); __END_DECLS #endif /* _KERNEL */ #endif /* !_SYS_MOUNT_H_ */ Index: projects/nfs-over-tls/usr.sbin/mountd/mountd.c =================================================================== --- projects/nfs-over-tls/usr.sbin/mountd/mountd.c (revision 364495) +++ projects/nfs-over-tls/usr.sbin/mountd/mountd.c (revision 364496) @@ -1,3799 +1,3865 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Herb Hasler and Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1989, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /*not lint*/ #if 0 #ifndef lint static char sccsid[] = "@(#)mountd.c 8.15 (Berkeley) 5/1/95"; #endif /*not lint*/ #endif #include __FBSDID("$FreeBSD$"); #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pathnames.h" #include "mntopts.h" #ifdef DEBUG #include #endif /* * Structures for keeping the mount list and export list */ struct mountlist { char ml_host[MNTNAMLEN+1]; char ml_dirp[MNTPATHLEN+1]; SLIST_ENTRY(mountlist) next; }; struct dirlist { struct dirlist *dp_left; struct dirlist *dp_right; int dp_flag; struct hostlist *dp_hosts; /* List of hosts this dir exported to */ char *dp_dirp; }; /* dp_flag bits */ #define DP_DEFSET 0x1 #define DP_HOSTSET 0x2 +/* + * maproot/mapall credentials. + */ +struct expcred { + uid_t cr_uid; + int cr_ngroups; + gid_t cr_groups[NGROUPS_MAX + 1]; +}; + struct exportlist { struct dirlist *ex_dirl; struct dirlist *ex_defdir; struct grouplist *ex_grphead; int ex_flag; fsid_t ex_fs; char *ex_fsdir; char *ex_indexfile; - struct xucred ex_defanon; - int ex_defexflags; + struct expcred ex_defanon; + uint64_t ex_defexflags; int ex_numsecflavors; int ex_secflavors[MAXSECFLAVORS]; int ex_defnumsecflavors; int ex_defsecflavors[MAXSECFLAVORS]; SLIST_ENTRY(exportlist) entries; }; /* ex_flag bits */ #define EX_LINKED 0x1 #define EX_DONE 0x2 #define EX_DEFSET 0x4 #define EX_PUBLICFH 0x8 SLIST_HEAD(exportlisthead, exportlist); struct netmsk { struct sockaddr_storage nt_net; struct sockaddr_storage nt_mask; char *nt_name; }; union grouptypes { struct addrinfo *gt_addrinfo; struct netmsk gt_net; }; struct grouplist { int gr_type; union grouptypes gr_ptr; struct grouplist *gr_next; - struct xucred gr_anon; - int gr_exflags; + struct expcred gr_anon; + uint64_t gr_exflags; int gr_flag; int gr_numsecflavors; int gr_secflavors[MAXSECFLAVORS]; }; /* Group types */ #define GT_NULL 0x0 #define GT_HOST 0x1 #define GT_NET 0x2 #define GT_DEFAULT 0x3 #define GT_IGNORE 0x5 /* Group flags */ #define GR_FND 0x1 struct hostlist { int ht_flag; /* Uses DP_xx bits */ struct grouplist *ht_grp; struct hostlist *ht_next; }; struct fhreturn { int fhr_flag; int fhr_vers; nfsfh_t fhr_fh; int fhr_numsecflavors; int *fhr_secflavors; }; #define GETPORT_MAXTRY 20 /* Max tries to get a port # */ +/* + * How long to delay a reload of exports when there are RPC request(s) + * to process, in usec. Must be less than 1second. + */ +#define RELOADDELAY 250000 + /* Global defs */ static char *add_expdir(struct dirlist **, char *, int); static void add_dlist(struct dirlist **, struct dirlist *, struct grouplist *, int, struct exportlist *, - struct xucred *, int); + struct expcred *, uint64_t); static void add_mlist(char *, char *); static int check_dirpath(char *); static int check_options(struct dirlist *); static int checkmask(struct sockaddr *sa); static int chk_host(struct dirlist *, struct sockaddr *, int *, int *, int *, int **); static char *strsep_quote(char **stringp, const char *delim); static int create_service(struct netconfig *nconf); static void complete_service(struct netconfig *nconf, char *port_str); static void clearout_service(void); static void del_mlist(char *hostp, char *dirp); static struct dirlist *dirp_search(struct dirlist *, char *); static int do_export_mount(struct exportlist *, struct statfs *); -static int do_mount(struct exportlist *, struct grouplist *, int, - struct xucred *, char *, int, struct statfs *, int, int *); +static int do_mount(struct exportlist *, struct grouplist *, uint64_t, + struct expcred *, char *, int, struct statfs *, int, int *); static int do_opt(char **, char **, struct exportlist *, - struct grouplist *, int *, int *, struct xucred *); + struct grouplist *, int *, uint64_t *, struct expcred *); static struct exportlist *ex_search(fsid_t *, struct exportlisthead *); static struct exportlist *get_exp(void); static void free_dir(struct dirlist *); static void free_exp(struct exportlist *); static void free_grp(struct grouplist *); static void free_host(struct hostlist *); static void free_v4rootexp(void); static void get_exportlist_one(int); static void get_exportlist(int); static void insert_exports(struct exportlist *, struct exportlisthead *); static void free_exports(struct exportlisthead *); static void read_exportfile(int); static int compare_nmount_exportlist(struct iovec *, int, char *); static int compare_export(struct exportlist *, struct exportlist *); -static int compare_cred(struct xucred *, struct xucred *); +static int compare_cred(struct expcred *, struct expcred *); static int compare_secflavor(int *, int *, int); static void delete_export(struct iovec *, int, struct statfs *, char *); static int get_host(char *, struct grouplist *, struct grouplist *); static struct hostlist *get_ht(void); static int get_line(void); static void get_mountlist(void); static int get_net(char *, struct netmsk *, int); static void getexp_err(struct exportlist *, struct grouplist *, const char *); static struct grouplist *get_grp(void); static void hang_dirp(struct dirlist *, struct grouplist *, - struct exportlist *, int, struct xucred *, int); + struct exportlist *, int, struct expcred *, uint64_t); static void huphandler(int sig); static int makemask(struct sockaddr_storage *ssp, int bitlen); static void mntsrv(struct svc_req *, SVCXPRT *); static void nextfield(char **, char **); static void out_of_mem(void); -static void parsecred(char *, struct xucred *); +static void parsecred(char *, struct expcred *); static int parsesec(char *, struct exportlist *); static int put_exlist(struct dirlist *, XDR *, struct dirlist *, int *, int); static void *sa_rawaddr(struct sockaddr *sa, int *nbytes); static int sacmp(struct sockaddr *sa1, struct sockaddr *sa2, struct sockaddr *samask); static int scan_tree(struct dirlist *, struct sockaddr *); static void usage(void); static int xdr_dir(XDR *, char *); static int xdr_explist(XDR *, caddr_t); static int xdr_explist_brief(XDR *, caddr_t); static int xdr_explist_common(XDR *, caddr_t, int); static int xdr_fhs(XDR *, caddr_t); static int xdr_mlist(XDR *, caddr_t); static void terminate(int); +static void cp_cred(struct expcred *, struct expcred *); #define EXPHASH(f) (fnv_32_buf((f), sizeof(fsid_t), 0) % exphashsize) static struct exportlisthead *exphead = NULL; static struct exportlisthead *oldexphead = NULL; static int exphashsize = 0; static SLIST_HEAD(, mountlist) mlhead = SLIST_HEAD_INITIALIZER(&mlhead); static char *exnames_default[2] = { _PATH_EXPORTS, NULL }; static char **exnames; static char **hosts = NULL; -static struct xucred def_anon = { - XUCRED_VERSION, - (uid_t)65534, - 1, - { (gid_t)65533 }, - { NULL } -}; static int force_v2 = 0; static int resvport_only = 1; static int nhosts = 0; static int dir_only = 1; static int dolog = 0; static int got_sighup = 0; static int xcreated = 0; static char *svcport_str = NULL; static int mallocd_svcport = 0; static int *sock_fd; static int sock_fdcnt; static int sock_fdpos; static int suspend_nfsd = 0; static int opt_flags; static int have_v6 = 1; static int v4root_phase = 0; static char v4root_dirpath[PATH_MAX + 1]; static struct exportlist *v4root_ep = NULL; static int has_publicfh = 0; static int has_set_publicfh = 0; static struct pidfh *pfh = NULL; /* Bits for opt_flags above */ #define OP_MAPROOT 0x01 #define OP_MAPALL 0x02 /* 0x4 free */ #define OP_MASK 0x08 #define OP_NET 0x10 #define OP_ALLDIRS 0x40 #define OP_HAVEMASK 0x80 /* A mask was specified or inferred. */ #define OP_QUIET 0x100 #define OP_MASKLEN 0x200 #define OP_SEC 0x400 #ifdef DEBUG static int debug = 1; static void SYSLOG(int, const char *, ...) __printflike(2, 3); #define syslog SYSLOG #else static int debug = 0; #endif /* * The LOGDEBUG() syslog() calls are always compiled into the daemon. * To enable them, create a file at _PATH_MOUNTDDEBUG. This file can be empty. * To disable the logging, just delete the file at _PATH_MOUNTDDEBUG. */ static int logdebug = 0; #define LOGDEBUG(format, ...) \ (logdebug ? syslog(LOG_DEBUG, format, ## __VA_ARGS__) : 0) /* * Similar to strsep(), but it allows for quoted strings * and escaped characters. * * It returns the string (or NULL, if *stringp is NULL), * which is a de-quoted version of the string if necessary. * * It modifies *stringp in place. */ static char * strsep_quote(char **stringp, const char *delim) { char *srcptr, *dstptr, *retval; char quot = 0; if (stringp == NULL || *stringp == NULL) return (NULL); srcptr = dstptr = retval = *stringp; while (*srcptr) { /* * We're looking for several edge cases here. * First: if we're in quote state (quot != 0), * then we ignore the delim characters, but otherwise * process as normal, unless it is the quote character. * Second: if the current character is a backslash, * we take the next character as-is, without checking * for delim, quote, or backslash. Exception: if the * next character is a NUL, that's the end of the string. * Third: if the character is a quote character, we toggle * quote state. * Otherwise: check the current character for NUL, or * being in delim, and end the string if either is true. */ if (*srcptr == '\\') { srcptr++; /* * The edge case here is if the next character * is NUL, we want to stop processing. But if * it's not NUL, then we simply want to copy it. */ if (*srcptr) { *dstptr++ = *srcptr++; } continue; } if (quot == 0 && (*srcptr == '\'' || *srcptr == '"')) { quot = *srcptr++; continue; } if (quot && *srcptr == quot) { /* End of the quoted part */ quot = 0; srcptr++; continue; } if (!quot && strchr(delim, *srcptr)) break; *dstptr++ = *srcptr++; } *stringp = (*srcptr == '\0') ? NULL : srcptr + 1; *dstptr = 0; /* Terminate the string */ return (retval); } /* * Mountd server for NFS mount protocol as described in: * NFS: Network File System Protocol Specification, RFC1094, Appendix A * The optional arguments are the exports file name * default: _PATH_EXPORTS * and "-n" to allow nonroot mount. */ int main(int argc, char **argv) { fd_set readfds; struct netconfig *nconf; char *endptr, **hosts_bak; void *nc_handle; pid_t otherpid; in_port_t svcport; int c, k, s; int maxrec = RPC_MAXDATASIZE; int attempt_cnt, port_len, port_pos, ret; char **port_list; + uint64_t curtime, nexttime; + struct timeval tv; + struct timespec tp; + sigset_t sighup_mask; /* Check that another mountd isn't already running. */ pfh = pidfile_open(_PATH_MOUNTDPID, 0600, &otherpid); if (pfh == NULL) { if (errno == EEXIST) errx(1, "mountd already running, pid: %d.", otherpid); warn("cannot open or create pidfile"); } s = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP); if (s < 0) have_v6 = 0; else close(s); while ((c = getopt(argc, argv, "2deh:lnp:rS")) != -1) switch (c) { case '2': force_v2 = 1; break; case 'e': /* now a no-op, since this is the default */ break; case 'n': resvport_only = 0; break; case 'r': dir_only = 0; break; case 'd': debug = debug ? 0 : 1; break; case 'l': dolog = 1; break; case 'p': endptr = NULL; svcport = (in_port_t)strtoul(optarg, &endptr, 10); if (endptr == NULL || *endptr != '\0' || svcport == 0 || svcport >= IPPORT_MAX) usage(); svcport_str = strdup(optarg); break; case 'h': ++nhosts; hosts_bak = hosts; hosts_bak = realloc(hosts, nhosts * sizeof(char *)); if (hosts_bak == NULL) { if (hosts != NULL) { for (k = 0; k < nhosts; k++) free(hosts[k]); free(hosts); out_of_mem(); } } hosts = hosts_bak; hosts[nhosts - 1] = strdup(optarg); if (hosts[nhosts - 1] == NULL) { for (k = 0; k < (nhosts - 1); k++) free(hosts[k]); free(hosts); out_of_mem(); } break; case 'S': suspend_nfsd = 1; break; default: usage(); } if (modfind("nfsd") < 0) { /* Not present in kernel, try loading it */ if (kldload("nfsd") < 0 || modfind("nfsd") < 0) errx(1, "NFS server is not available"); } argc -= optind; argv += optind; if (argc > 0) exnames = argv; else exnames = exnames_default; openlog("mountd", LOG_PID, LOG_DAEMON); if (debug) warnx("getting export list"); get_exportlist(0); if (debug) warnx("getting mount list"); get_mountlist(); if (debug) warnx("here we go"); if (debug == 0) { daemon(0, 0); signal(SIGINT, SIG_IGN); signal(SIGQUIT, SIG_IGN); } signal(SIGHUP, huphandler); signal(SIGTERM, terminate); signal(SIGPIPE, SIG_IGN); pidfile_write(pfh); rpcb_unset(MOUNTPROG, MOUNTVERS, NULL); rpcb_unset(MOUNTPROG, MOUNTVERS3, NULL); rpc_control(RPC_SVC_CONNMAXREC_SET, &maxrec); if (!resvport_only) { if (sysctlbyname("vfs.nfsd.nfs_privport", NULL, NULL, &resvport_only, sizeof(resvport_only)) != 0 && errno != ENOENT) { syslog(LOG_ERR, "sysctl: %m"); exit(1); } } /* * If no hosts were specified, add a wildcard entry to bind to * INADDR_ANY. Otherwise make sure 127.0.0.1 and ::1 are added to the * list. */ if (nhosts == 0) { hosts = malloc(sizeof(char *)); if (hosts == NULL) out_of_mem(); hosts[0] = "*"; nhosts = 1; } else { hosts_bak = hosts; if (have_v6) { hosts_bak = realloc(hosts, (nhosts + 2) * sizeof(char *)); if (hosts_bak == NULL) { for (k = 0; k < nhosts; k++) free(hosts[k]); free(hosts); out_of_mem(); } else hosts = hosts_bak; nhosts += 2; hosts[nhosts - 2] = "::1"; } else { hosts_bak = realloc(hosts, (nhosts + 1) * sizeof(char *)); if (hosts_bak == NULL) { for (k = 0; k < nhosts; k++) free(hosts[k]); free(hosts); out_of_mem(); } else { nhosts += 1; hosts = hosts_bak; } } hosts[nhosts - 1] = "127.0.0.1"; } attempt_cnt = 1; sock_fdcnt = 0; sock_fd = NULL; port_list = NULL; port_len = 0; nc_handle = setnetconfig(); while ((nconf = getnetconfig(nc_handle))) { if (nconf->nc_flag & NC_VISIBLE) { if (have_v6 == 0 && strcmp(nconf->nc_protofmly, "inet6") == 0) { /* DO NOTHING */ } else { ret = create_service(nconf); if (ret == 1) /* Ignore this call */ continue; if (ret < 0) { /* * Failed to bind port, so close off * all sockets created and try again * if the port# was dynamically * assigned via bind(2). */ clearout_service(); if (mallocd_svcport != 0 && attempt_cnt < GETPORT_MAXTRY) { free(svcport_str); svcport_str = NULL; mallocd_svcport = 0; } else { errno = EADDRINUSE; syslog(LOG_ERR, "bindresvport_sa: %m"); exit(1); } /* Start over at the first service. */ free(sock_fd); sock_fdcnt = 0; sock_fd = NULL; nc_handle = setnetconfig(); attempt_cnt++; } else if (mallocd_svcport != 0 && attempt_cnt == GETPORT_MAXTRY) { /* * For the last attempt, allow * different port #s for each nconf * by saving the svcport_str and * setting it back to NULL. */ port_list = realloc(port_list, (port_len + 1) * sizeof(char *)); if (port_list == NULL) out_of_mem(); port_list[port_len++] = svcport_str; svcport_str = NULL; mallocd_svcport = 0; } } } } /* * Successfully bound the ports, so call complete_service() to * do the rest of the setup on the service(s). */ sock_fdpos = 0; port_pos = 0; nc_handle = setnetconfig(); while ((nconf = getnetconfig(nc_handle))) { if (nconf->nc_flag & NC_VISIBLE) { if (have_v6 == 0 && strcmp(nconf->nc_protofmly, "inet6") == 0) { /* DO NOTHING */ } else if (port_list != NULL) { if (port_pos >= port_len) { syslog(LOG_ERR, "too many port#s"); exit(1); } complete_service(nconf, port_list[port_pos++]); } else complete_service(nconf, svcport_str); } } endnetconfig(nc_handle); free(sock_fd); if (port_list != NULL) { for (port_pos = 0; port_pos < port_len; port_pos++) free(port_list[port_pos]); free(port_list); } if (xcreated == 0) { syslog(LOG_ERR, "could not create any services"); exit(1); } /* Expand svc_run() here so that we can call get_exportlist(). */ + curtime = nexttime = 0; + sigemptyset(&sighup_mask); + sigaddset(&sighup_mask, SIGHUP); for (;;) { - if (got_sighup) { - get_exportlist(1); + clock_gettime(CLOCK_MONOTONIC, &tp); + curtime = tp.tv_sec; + curtime = curtime * 1000000 + tp.tv_nsec / 1000; + sigprocmask(SIG_BLOCK, &sighup_mask, NULL); + if (got_sighup && curtime >= nexttime) { got_sighup = 0; - } + sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL); + get_exportlist(1); + clock_gettime(CLOCK_MONOTONIC, &tp); + nexttime = tp.tv_sec; + nexttime = nexttime * 1000000 + tp.tv_nsec / 1000 + + RELOADDELAY; + } else + sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL); + + /* + * If a reload is pending, poll for received request(s), + * otherwise set a RELOADDELAY timeout, since a SIGHUP + * could be processed between the got_sighup test and + * the select() system call. + */ + tv.tv_sec = 0; + if (got_sighup) + tv.tv_usec = 0; + else + tv.tv_usec = RELOADDELAY; readfds = svc_fdset; - switch (select(svc_maxfd + 1, &readfds, NULL, NULL, NULL)) { + switch (select(svc_maxfd + 1, &readfds, NULL, NULL, &tv)) { case -1: - if (errno == EINTR) - continue; + if (errno == EINTR) { + /* Allow a reload now. */ + nexttime = 0; + continue; + } syslog(LOG_ERR, "mountd died: select: %m"); exit(1); case 0: + /* Allow a reload now. */ + nexttime = 0; continue; default: svc_getreqset(&readfds); } } } /* * This routine creates and binds sockets on the appropriate * addresses. It gets called one time for each transport. * It returns 0 upon success, 1 for ingore the call and -1 to indicate * bind failed with EADDRINUSE. * Any file descriptors that have been created are stored in sock_fd and * the total count of them is maintained in sock_fdcnt. */ static int create_service(struct netconfig *nconf) { struct addrinfo hints, *res = NULL; struct sockaddr_in *sin; struct sockaddr_in6 *sin6; struct __rpc_sockinfo si; int aicode; int fd; int nhostsbak; int one = 1; int r; u_int32_t host_addr[4]; /* IPv4 or IPv6 */ int mallocd_res; if ((nconf->nc_semantics != NC_TPI_CLTS) && (nconf->nc_semantics != NC_TPI_COTS) && (nconf->nc_semantics != NC_TPI_COTS_ORD)) return (1); /* not my type */ /* * XXX - using RPC library internal functions. */ if (!__rpc_nconf2sockinfo(nconf, &si)) { syslog(LOG_ERR, "cannot get information for %s", nconf->nc_netid); return (1); } /* Get mountd's address on this transport */ memset(&hints, 0, sizeof hints); hints.ai_family = si.si_af; hints.ai_socktype = si.si_socktype; hints.ai_protocol = si.si_proto; /* * Bind to specific IPs if asked to */ nhostsbak = nhosts; while (nhostsbak > 0) { --nhostsbak; sock_fd = realloc(sock_fd, (sock_fdcnt + 1) * sizeof(int)); if (sock_fd == NULL) out_of_mem(); sock_fd[sock_fdcnt++] = -1; /* Set invalid for now. */ mallocd_res = 0; hints.ai_flags = AI_PASSIVE; /* * XXX - using RPC library internal functions. */ if ((fd = __rpc_nconf2fd(nconf)) < 0) { int non_fatal = 0; if (errno == EAFNOSUPPORT && nconf->nc_semantics != NC_TPI_CLTS) non_fatal = 1; syslog(non_fatal ? LOG_DEBUG : LOG_ERR, "cannot create socket for %s", nconf->nc_netid); if (non_fatal != 0) continue; exit(1); } switch (hints.ai_family) { case AF_INET: if (inet_pton(AF_INET, hosts[nhostsbak], host_addr) == 1) { hints.ai_flags |= AI_NUMERICHOST; } else { /* * Skip if we have an AF_INET6 address. */ if (inet_pton(AF_INET6, hosts[nhostsbak], host_addr) == 1) { close(fd); continue; } } break; case AF_INET6: if (inet_pton(AF_INET6, hosts[nhostsbak], host_addr) == 1) { hints.ai_flags |= AI_NUMERICHOST; } else { /* * Skip if we have an AF_INET address. */ if (inet_pton(AF_INET, hosts[nhostsbak], host_addr) == 1) { close(fd); continue; } } /* * We're doing host-based access checks here, so don't * allow v4-in-v6 to confuse things. The kernel will * disable it by default on NFS sockets too. */ if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof one) < 0) { syslog(LOG_ERR, "can't disable v4-in-v6 on IPv6 socket"); exit(1); } break; default: break; } /* * If no hosts were specified, just bind to INADDR_ANY */ if (strcmp("*", hosts[nhostsbak]) == 0) { if (svcport_str == NULL) { res = malloc(sizeof(struct addrinfo)); if (res == NULL) out_of_mem(); mallocd_res = 1; res->ai_flags = hints.ai_flags; res->ai_family = hints.ai_family; res->ai_protocol = hints.ai_protocol; switch (res->ai_family) { case AF_INET: sin = malloc(sizeof(struct sockaddr_in)); if (sin == NULL) out_of_mem(); sin->sin_family = AF_INET; sin->sin_port = htons(0); sin->sin_addr.s_addr = htonl(INADDR_ANY); res->ai_addr = (struct sockaddr*) sin; res->ai_addrlen = (socklen_t) sizeof(struct sockaddr_in); break; case AF_INET6: sin6 = malloc(sizeof(struct sockaddr_in6)); if (sin6 == NULL) out_of_mem(); sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(0); sin6->sin6_addr = in6addr_any; res->ai_addr = (struct sockaddr*) sin6; res->ai_addrlen = (socklen_t) sizeof(struct sockaddr_in6); break; default: syslog(LOG_ERR, "bad addr fam %d", res->ai_family); exit(1); } } else { if ((aicode = getaddrinfo(NULL, svcport_str, &hints, &res)) != 0) { syslog(LOG_ERR, "cannot get local address for %s: %s", nconf->nc_netid, gai_strerror(aicode)); close(fd); continue; } } } else { if ((aicode = getaddrinfo(hosts[nhostsbak], svcport_str, &hints, &res)) != 0) { syslog(LOG_ERR, "cannot get local address for %s: %s", nconf->nc_netid, gai_strerror(aicode)); close(fd); continue; } } /* Store the fd. */ sock_fd[sock_fdcnt - 1] = fd; /* Now, attempt the bind. */ r = bindresvport_sa(fd, res->ai_addr); if (r != 0) { if (errno == EADDRINUSE && mallocd_svcport != 0) { if (mallocd_res != 0) { free(res->ai_addr); free(res); } else freeaddrinfo(res); return (-1); } syslog(LOG_ERR, "bindresvport_sa: %m"); exit(1); } if (svcport_str == NULL) { svcport_str = malloc(NI_MAXSERV * sizeof(char)); if (svcport_str == NULL) out_of_mem(); mallocd_svcport = 1; if (getnameinfo(res->ai_addr, res->ai_addr->sa_len, NULL, NI_MAXHOST, svcport_str, NI_MAXSERV * sizeof(char), NI_NUMERICHOST | NI_NUMERICSERV)) errx(1, "Cannot get port number"); } if (mallocd_res != 0) { free(res->ai_addr); free(res); } else freeaddrinfo(res); res = NULL; } return (0); } /* * Called after all the create_service() calls have succeeded, to complete * the setup and registration. */ static void complete_service(struct netconfig *nconf, char *port_str) { struct addrinfo hints, *res = NULL; struct __rpc_sockinfo si; struct netbuf servaddr; SVCXPRT *transp = NULL; int aicode, fd, nhostsbak; int registered = 0; if ((nconf->nc_semantics != NC_TPI_CLTS) && (nconf->nc_semantics != NC_TPI_COTS) && (nconf->nc_semantics != NC_TPI_COTS_ORD)) return; /* not my type */ /* * XXX - using RPC library internal functions. */ if (!__rpc_nconf2sockinfo(nconf, &si)) { syslog(LOG_ERR, "cannot get information for %s", nconf->nc_netid); return; } nhostsbak = nhosts; while (nhostsbak > 0) { --nhostsbak; if (sock_fdpos >= sock_fdcnt) { /* Should never happen. */ syslog(LOG_ERR, "Ran out of socket fd's"); return; } fd = sock_fd[sock_fdpos++]; if (fd < 0) continue; /* * Using -1 tells listen(2) to use * kern.ipc.soacceptqueue for the backlog. */ if (nconf->nc_semantics != NC_TPI_CLTS) listen(fd, -1); if (nconf->nc_semantics == NC_TPI_CLTS ) transp = svc_dg_create(fd, 0, 0); else transp = svc_vc_create(fd, RPC_MAXDATASIZE, RPC_MAXDATASIZE); if (transp != (SVCXPRT *) NULL) { if (!svc_reg(transp, MOUNTPROG, MOUNTVERS, mntsrv, NULL)) syslog(LOG_ERR, "can't register %s MOUNTVERS service", nconf->nc_netid); if (!force_v2) { if (!svc_reg(transp, MOUNTPROG, MOUNTVERS3, mntsrv, NULL)) syslog(LOG_ERR, "can't register %s MOUNTVERS3 service", nconf->nc_netid); } } else syslog(LOG_WARNING, "can't create %s services", nconf->nc_netid); if (registered == 0) { registered = 1; memset(&hints, 0, sizeof hints); hints.ai_flags = AI_PASSIVE; hints.ai_family = si.si_af; hints.ai_socktype = si.si_socktype; hints.ai_protocol = si.si_proto; if ((aicode = getaddrinfo(NULL, port_str, &hints, &res)) != 0) { syslog(LOG_ERR, "cannot get local address: %s", gai_strerror(aicode)); exit(1); } servaddr.buf = malloc(res->ai_addrlen); memcpy(servaddr.buf, res->ai_addr, res->ai_addrlen); servaddr.len = res->ai_addrlen; rpcb_set(MOUNTPROG, MOUNTVERS, nconf, &servaddr); rpcb_set(MOUNTPROG, MOUNTVERS3, nconf, &servaddr); xcreated++; freeaddrinfo(res); } } /* end while */ } /* * Clear out sockets after a failure to bind one of them, so that the * cycle of socket creation/binding can start anew. */ static void clearout_service(void) { int i; for (i = 0; i < sock_fdcnt; i++) { if (sock_fd[i] >= 0) { shutdown(sock_fd[i], SHUT_RDWR); close(sock_fd[i]); } } } static void usage(void) { fprintf(stderr, "usage: mountd [-2] [-d] [-e] [-l] [-n] [-p ] [-r] " "[-S] [-h ] [export_file ...]\n"); exit(1); } /* * The mount rpc service */ void mntsrv(struct svc_req *rqstp, SVCXPRT *transp) { struct exportlist *ep; struct dirlist *dp; struct fhreturn fhr; struct stat stb; struct statfs fsb; char host[NI_MAXHOST], numerichost[NI_MAXHOST]; int lookup_failed = 1; struct sockaddr *saddr; u_short sport; char rpcpath[MNTPATHLEN + 1], dirpath[MAXPATHLEN]; - int bad = 0, defset, hostset; + int defset, hostset; + long bad = 0; sigset_t sighup_mask; int numsecflavors, *secflavorsp; sigemptyset(&sighup_mask); sigaddset(&sighup_mask, SIGHUP); saddr = svc_getrpccaller(transp)->buf; switch (saddr->sa_family) { case AF_INET6: sport = ntohs(((struct sockaddr_in6 *)saddr)->sin6_port); break; case AF_INET: sport = ntohs(((struct sockaddr_in *)saddr)->sin_port); break; default: syslog(LOG_ERR, "request from unknown address family"); return; } switch (rqstp->rq_proc) { case MOUNTPROC_MNT: case MOUNTPROC_UMNT: case MOUNTPROC_UMNTALL: lookup_failed = getnameinfo(saddr, saddr->sa_len, host, sizeof host, NULL, 0, 0); } getnameinfo(saddr, saddr->sa_len, numerichost, sizeof numerichost, NULL, 0, NI_NUMERICHOST); switch (rqstp->rq_proc) { case NULLPROC: if (!svc_sendreply(transp, (xdrproc_t)xdr_void, NULL)) syslog(LOG_ERR, "can't send reply"); return; case MOUNTPROC_MNT: if (sport >= IPPORT_RESERVED && resvport_only) { syslog(LOG_NOTICE, "mount request from %s from unprivileged port", numerichost); svcerr_weakauth(transp); return; } if (!svc_getargs(transp, (xdrproc_t)xdr_dir, rpcpath)) { syslog(LOG_NOTICE, "undecodable mount request from %s", numerichost); svcerr_decode(transp); return; } /* * Get the real pathname and make sure it is a directory * or a regular file if the -r option was specified * and it exists. */ if (realpath(rpcpath, dirpath) == NULL || stat(dirpath, &stb) < 0 || statfs(dirpath, &fsb) < 0) { chdir("/"); /* Just in case realpath doesn't */ syslog(LOG_NOTICE, "mount request from %s for non existent path %s", numerichost, dirpath); if (debug) warnx("stat failed on %s", dirpath); bad = ENOENT; /* We will send error reply later */ } if (!bad && !S_ISDIR(stb.st_mode) && (dir_only || !S_ISREG(stb.st_mode))) { syslog(LOG_NOTICE, "mount request from %s for non-directory path %s", numerichost, dirpath); if (debug) warnx("mounting non-directory %s", dirpath); bad = ENOTDIR; /* We will send error reply later */ } /* Check in the exports list */ sigprocmask(SIG_BLOCK, &sighup_mask, NULL); if (bad) ep = NULL; else ep = ex_search(&fsb.f_fsid, exphead); hostset = defset = 0; if (ep && (chk_host(ep->ex_defdir, saddr, &defset, &hostset, &numsecflavors, &secflavorsp) || ((dp = dirp_search(ep->ex_dirl, dirpath)) && chk_host(dp, saddr, &defset, &hostset, &numsecflavors, &secflavorsp)) || (defset && scan_tree(ep->ex_defdir, saddr) == 0 && scan_tree(ep->ex_dirl, saddr) == 0))) { if (bad) { if (!svc_sendreply(transp, (xdrproc_t)xdr_long, (caddr_t)&bad)) syslog(LOG_ERR, "can't send reply"); sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL); return; } if (hostset & DP_HOSTSET) { fhr.fhr_flag = hostset; fhr.fhr_numsecflavors = numsecflavors; fhr.fhr_secflavors = secflavorsp; } else { fhr.fhr_flag = defset; fhr.fhr_numsecflavors = ep->ex_defnumsecflavors; fhr.fhr_secflavors = ep->ex_defsecflavors; } fhr.fhr_vers = rqstp->rq_vers; /* Get the file handle */ memset(&fhr.fhr_fh, 0, sizeof(nfsfh_t)); if (getfh(dirpath, (fhandle_t *)&fhr.fhr_fh) < 0) { bad = errno; syslog(LOG_ERR, "can't get fh for %s", dirpath); if (!svc_sendreply(transp, (xdrproc_t)xdr_long, (caddr_t)&bad)) syslog(LOG_ERR, "can't send reply"); sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL); return; } if (!svc_sendreply(transp, (xdrproc_t)xdr_fhs, (caddr_t)&fhr)) syslog(LOG_ERR, "can't send reply"); if (!lookup_failed) add_mlist(host, dirpath); else add_mlist(numerichost, dirpath); if (debug) warnx("mount successful"); if (dolog) syslog(LOG_NOTICE, "mount request succeeded from %s for %s", numerichost, dirpath); } else { if (!bad) bad = EACCES; syslog(LOG_NOTICE, "mount request denied from %s for %s", numerichost, dirpath); } if (bad && !svc_sendreply(transp, (xdrproc_t)xdr_long, (caddr_t)&bad)) syslog(LOG_ERR, "can't send reply"); sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL); return; case MOUNTPROC_DUMP: if (!svc_sendreply(transp, (xdrproc_t)xdr_mlist, (caddr_t)NULL)) syslog(LOG_ERR, "can't send reply"); else if (dolog) syslog(LOG_NOTICE, "dump request succeeded from %s", numerichost); return; case MOUNTPROC_UMNT: if (sport >= IPPORT_RESERVED && resvport_only) { syslog(LOG_NOTICE, "umount request from %s from unprivileged port", numerichost); svcerr_weakauth(transp); return; } if (!svc_getargs(transp, (xdrproc_t)xdr_dir, rpcpath)) { syslog(LOG_NOTICE, "undecodable umount request from %s", numerichost); svcerr_decode(transp); return; } if (realpath(rpcpath, dirpath) == NULL) { syslog(LOG_NOTICE, "umount request from %s " "for non existent path %s", numerichost, dirpath); } if (!svc_sendreply(transp, (xdrproc_t)xdr_void, (caddr_t)NULL)) syslog(LOG_ERR, "can't send reply"); if (!lookup_failed) del_mlist(host, dirpath); del_mlist(numerichost, dirpath); if (dolog) syslog(LOG_NOTICE, "umount request succeeded from %s for %s", numerichost, dirpath); return; case MOUNTPROC_UMNTALL: if (sport >= IPPORT_RESERVED && resvport_only) { syslog(LOG_NOTICE, "umountall request from %s from unprivileged port", numerichost); svcerr_weakauth(transp); return; } if (!svc_sendreply(transp, (xdrproc_t)xdr_void, (caddr_t)NULL)) syslog(LOG_ERR, "can't send reply"); if (!lookup_failed) del_mlist(host, NULL); del_mlist(numerichost, NULL); if (dolog) syslog(LOG_NOTICE, "umountall request succeeded from %s", numerichost); return; case MOUNTPROC_EXPORT: if (!svc_sendreply(transp, (xdrproc_t)xdr_explist, (caddr_t)NULL)) if (!svc_sendreply(transp, (xdrproc_t)xdr_explist_brief, (caddr_t)NULL)) syslog(LOG_ERR, "can't send reply"); if (dolog) syslog(LOG_NOTICE, "export request succeeded from %s", numerichost); return; default: svcerr_noproc(transp); return; } } /* * Xdr conversion for a dirpath string */ static int xdr_dir(XDR *xdrsp, char *dirp) { return (xdr_string(xdrsp, &dirp, MNTPATHLEN)); } /* * Xdr routine to generate file handle reply */ static int xdr_fhs(XDR *xdrsp, caddr_t cp) { struct fhreturn *fhrp = (struct fhreturn *)cp; u_long ok = 0, len, auth; int i; if (!xdr_long(xdrsp, &ok)) return (0); switch (fhrp->fhr_vers) { case 1: return (xdr_opaque(xdrsp, (caddr_t)&fhrp->fhr_fh, NFSX_V2FH)); case 3: len = NFSX_V3FH; if (!xdr_long(xdrsp, &len)) return (0); if (!xdr_opaque(xdrsp, (caddr_t)&fhrp->fhr_fh, len)) return (0); if (fhrp->fhr_numsecflavors) { if (!xdr_int(xdrsp, &fhrp->fhr_numsecflavors)) return (0); for (i = 0; i < fhrp->fhr_numsecflavors; i++) if (!xdr_int(xdrsp, &fhrp->fhr_secflavors[i])) return (0); return (1); } else { auth = AUTH_SYS; len = 1; if (!xdr_long(xdrsp, &len)) return (0); return (xdr_long(xdrsp, &auth)); } } return (0); } static int xdr_mlist(XDR *xdrsp, caddr_t cp __unused) { struct mountlist *mlp; int true = 1; int false = 0; char *strp; SLIST_FOREACH(mlp, &mlhead, next) { if (!xdr_bool(xdrsp, &true)) return (0); strp = &mlp->ml_host[0]; if (!xdr_string(xdrsp, &strp, MNTNAMLEN)) return (0); strp = &mlp->ml_dirp[0]; if (!xdr_string(xdrsp, &strp, MNTPATHLEN)) return (0); } if (!xdr_bool(xdrsp, &false)) return (0); return (1); } /* * Xdr conversion for export list */ static int xdr_explist_common(XDR *xdrsp, caddr_t cp __unused, int brief) { struct exportlist *ep; int false = 0; int putdef; sigset_t sighup_mask; int i; sigemptyset(&sighup_mask); sigaddset(&sighup_mask, SIGHUP); sigprocmask(SIG_BLOCK, &sighup_mask, NULL); for (i = 0; i < exphashsize; i++) SLIST_FOREACH(ep, &exphead[i], entries) { putdef = 0; if (put_exlist(ep->ex_dirl, xdrsp, ep->ex_defdir, &putdef, brief)) goto errout; if (ep->ex_defdir && putdef == 0 && put_exlist(ep->ex_defdir, xdrsp, NULL, &putdef, brief)) goto errout; } sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL); if (!xdr_bool(xdrsp, &false)) return (0); return (1); errout: sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL); return (0); } /* * Called from xdr_explist() to traverse the tree and export the * directory paths. */ static int put_exlist(struct dirlist *dp, XDR *xdrsp, struct dirlist *adp, int *putdefp, int brief) { struct grouplist *grp; struct hostlist *hp; int true = 1; int false = 0; int gotalldir = 0; char *strp; if (dp) { if (put_exlist(dp->dp_left, xdrsp, adp, putdefp, brief)) return (1); if (!xdr_bool(xdrsp, &true)) return (1); strp = dp->dp_dirp; if (!xdr_string(xdrsp, &strp, MNTPATHLEN)) return (1); if (adp && !strcmp(dp->dp_dirp, adp->dp_dirp)) { gotalldir = 1; *putdefp = 1; } if (brief) { if (!xdr_bool(xdrsp, &true)) return (1); strp = "(...)"; if (!xdr_string(xdrsp, &strp, MNTPATHLEN)) return (1); } else if ((dp->dp_flag & DP_DEFSET) == 0 && (gotalldir == 0 || (adp->dp_flag & DP_DEFSET) == 0)) { hp = dp->dp_hosts; while (hp) { grp = hp->ht_grp; if (grp->gr_type == GT_HOST) { if (!xdr_bool(xdrsp, &true)) return (1); strp = grp->gr_ptr.gt_addrinfo->ai_canonname; if (!xdr_string(xdrsp, &strp, MNTNAMLEN)) return (1); } else if (grp->gr_type == GT_NET) { if (!xdr_bool(xdrsp, &true)) return (1); strp = grp->gr_ptr.gt_net.nt_name; if (!xdr_string(xdrsp, &strp, MNTNAMLEN)) return (1); } hp = hp->ht_next; if (gotalldir && hp == (struct hostlist *)NULL) { hp = adp->dp_hosts; gotalldir = 0; } } } if (!xdr_bool(xdrsp, &false)) return (1); if (put_exlist(dp->dp_right, xdrsp, adp, putdefp, brief)) return (1); } return (0); } static int xdr_explist(XDR *xdrsp, caddr_t cp) { return xdr_explist_common(xdrsp, cp, 0); } static int xdr_explist_brief(XDR *xdrsp, caddr_t cp) { return xdr_explist_common(xdrsp, cp, 1); } static char *line; static size_t linesize; static FILE *exp_file; /* * Get the export list from one, currently open file */ static void get_exportlist_one(int passno) { struct exportlist *ep; struct grouplist *grp, *tgrp, *savgrp; struct dirlist *dirhead; struct statfs fsb; - struct xucred anon; + struct expcred anon; char *cp, *endcp, *dirp, *hst, *usr, *dom, savedc; - int len, has_host, exflags, got_nondir, dirplen, netgrp; + int len, has_host, got_nondir, dirplen, netgrp; + uint64_t exflags; v4root_phase = 0; dirhead = (struct dirlist *)NULL; while (get_line()) { if (debug) warnx("got line %s", line); cp = line; nextfield(&cp, &endcp); if (*cp == '#') goto nextline; /* * Set defaults. */ has_host = FALSE; - anon = def_anon; - exflags = MNTEX_EXPORTED; + anon.cr_uid = UID_NOBODY; + anon.cr_ngroups = 1; + anon.cr_groups[0] = GID_NOGROUP; + exflags = MNT_EXPORTED; got_nondir = 0; opt_flags = 0; ep = (struct exportlist *)NULL; dirp = NULL; /* * Handle the V4 root dir. */ if (*cp == 'V' && *(cp + 1) == '4' && *(cp + 2) == ':') { /* * V4: just indicates that it is the v4 root point, * so skip over that and set v4root_phase. */ if (v4root_phase > 0) { syslog(LOG_ERR, "V4:duplicate line, ignored"); goto nextline; } v4root_phase = 1; cp += 3; nextfield(&cp, &endcp); } /* * Create new exports list entry */ len = endcp-cp; tgrp = grp = get_grp(); while (len > 0) { if (len > MNTNAMLEN) { getexp_err(ep, tgrp, "mountpoint too long"); goto nextline; } if (*cp == '-') { if (ep == (struct exportlist *)NULL) { getexp_err(ep, tgrp, "flag before export path definition"); goto nextline; } if (debug) warnx("doing opt %s", cp); got_nondir = 1; if (do_opt(&cp, &endcp, ep, grp, &has_host, &exflags, &anon)) { getexp_err(ep, tgrp, NULL); goto nextline; } } else if (*cp == '/') { savedc = *endcp; *endcp = '\0'; if (v4root_phase > 1) { if (dirp != NULL) { getexp_err(ep, tgrp, "Multiple V4 dirs"); goto nextline; } } if (check_dirpath(cp) && statfs(cp, &fsb) >= 0) { if ((fsb.f_flags & MNT_AUTOMOUNTED) != 0) syslog(LOG_ERR, "Warning: exporting of " "automounted fs %s not supported", cp); if (got_nondir) { getexp_err(ep, tgrp, "dirs must be first"); goto nextline; } if (v4root_phase == 1) { if (dirp != NULL) { getexp_err(ep, tgrp, "Multiple V4 dirs"); goto nextline; } if (strlen(v4root_dirpath) == 0) { strlcpy(v4root_dirpath, cp, sizeof (v4root_dirpath)); } else if (strcmp(v4root_dirpath, cp) != 0) { syslog(LOG_ERR, "different V4 dirpath %s", cp); getexp_err(ep, tgrp, NULL); goto nextline; } dirp = cp; v4root_phase = 2; got_nondir = 1; ep = get_exp(); } else { if (ep) { - if (ep->ex_fs.val[0] != - fsb.f_fsid.val[0] || - ep->ex_fs.val[1] != - fsb.f_fsid.val[1]) { + if (fsidcmp(&ep->ex_fs, &fsb.f_fsid) + != 0) { getexp_err(ep, tgrp, "fsid mismatch"); goto nextline; } } else { /* * See if this directory is already * in the list. */ ep = ex_search(&fsb.f_fsid, exphead); if (ep == (struct exportlist *)NULL) { ep = get_exp(); ep->ex_fs = fsb.f_fsid; ep->ex_fsdir = strdup(fsb.f_mntonname); if (ep->ex_fsdir == NULL) out_of_mem(); if (debug) warnx( "making new ep fs=0x%x,0x%x", fsb.f_fsid.val[0], fsb.f_fsid.val[1]); } else if (debug) warnx("found ep fs=0x%x,0x%x", fsb.f_fsid.val[0], fsb.f_fsid.val[1]); } /* * Add dirpath to export mount point. */ dirp = add_expdir(&dirhead, cp, len); dirplen = len; } } else { getexp_err(ep, tgrp, "symbolic link in export path or statfs failed"); goto nextline; } *endcp = savedc; } else { savedc = *endcp; *endcp = '\0'; got_nondir = 1; if (ep == (struct exportlist *)NULL) { getexp_err(ep, tgrp, "host(s) before export path definition"); goto nextline; } /* * Get the host or netgroup. */ setnetgrent(cp); netgrp = getnetgrent(&hst, &usr, &dom); do { if (has_host) { grp->gr_next = get_grp(); grp = grp->gr_next; } if (netgrp) { if (hst == 0) { syslog(LOG_ERR, "null hostname in netgroup %s, skipping", cp); grp->gr_type = GT_IGNORE; } else if (get_host(hst, grp, tgrp)) { syslog(LOG_ERR, "bad host %s in netgroup %s, skipping", hst, cp); grp->gr_type = GT_IGNORE; } } else if (get_host(cp, grp, tgrp)) { syslog(LOG_ERR, "bad host %s, skipping", cp); grp->gr_type = GT_IGNORE; } has_host = TRUE; } while (netgrp && getnetgrent(&hst, &usr, &dom)); endnetgrent(); *endcp = savedc; } cp = endcp; nextfield(&cp, &endcp); len = endcp - cp; } if (check_options(dirhead)) { getexp_err(ep, tgrp, NULL); goto nextline; } if (!has_host) { grp->gr_type = GT_DEFAULT; if (debug) warnx("adding a default entry"); /* * Don't allow a network export coincide with a list of * host(s) on the same line. */ } else if ((opt_flags & OP_NET) && tgrp->gr_next) { getexp_err(ep, tgrp, "network/host conflict"); goto nextline; /* * If an export list was specified on this line, make sure * that we have at least one valid entry, otherwise skip it. */ } else { grp = tgrp; while (grp && grp->gr_type == GT_IGNORE) grp = grp->gr_next; if (! grp) { getexp_err(ep, tgrp, "no valid entries"); goto nextline; } } if (v4root_phase == 1) { getexp_err(ep, tgrp, "V4:root, no dirp, ignored"); goto nextline; } /* * Loop through hosts, pushing the exports into the kernel. * After loop, tgrp points to the start of the list and * grp points to the last entry in the list. * Do not do the do_mount() for passno == 1, since the * second pass will do it, as required. */ grp = tgrp; do { grp->gr_exflags = exflags; - grp->gr_anon = anon; + cp_cred(&grp->gr_anon, &anon); if (v4root_phase == 2 && passno == 0) LOGDEBUG("do_mount v4root"); if (passno == 0 && do_mount(ep, grp, exflags, &anon, dirp, dirplen, &fsb, ep->ex_numsecflavors, ep->ex_secflavors)) { getexp_err(ep, tgrp, NULL); goto nextline; } } while (grp->gr_next && (grp = grp->gr_next)); /* * For V4: don't enter in mount lists. */ if (v4root_phase > 0 && v4root_phase <= 2) { /* * These structures are used for the reload, * so save them for that case. Otherwise, just * free them up now. */ if (passno == 1 && ep != NULL) { savgrp = tgrp; while (tgrp != NULL) { /* * Save the security flavors and exflags * for this host set in the groups. */ tgrp->gr_numsecflavors = ep->ex_numsecflavors; if (ep->ex_numsecflavors > 0) memcpy(tgrp->gr_secflavors, ep->ex_secflavors, sizeof(ep->ex_secflavors)); tgrp = tgrp->gr_next; } if (v4root_ep == NULL) { v4root_ep = ep; ep = NULL; /* Don't free below. */ } grp->gr_next = v4root_ep->ex_grphead; v4root_ep->ex_grphead = savgrp; } if (ep != NULL) free_exp(ep); while (tgrp != NULL) { grp = tgrp; tgrp = tgrp->gr_next; free_grp(grp); } goto nextline; } /* * Success. Update the data structures. */ if (has_host) { hang_dirp(dirhead, tgrp, ep, opt_flags, &anon, exflags); grp->gr_next = ep->ex_grphead; ep->ex_grphead = tgrp; } else { hang_dirp(dirhead, (struct grouplist *)NULL, ep, opt_flags, &anon, exflags); free_grp(grp); } dirhead = (struct dirlist *)NULL; if ((ep->ex_flag & EX_LINKED) == 0) { insert_exports(ep, exphead); ep->ex_flag |= EX_LINKED; } nextline: v4root_phase = 0; if (dirhead) { free_dir(dirhead); dirhead = (struct dirlist *)NULL; } } } /* * Get the export list from all specified files */ static void get_exportlist(int passno) { struct export_args export; struct iovec *iov; struct statfs *mntbufp; char errmsg[255]; int num, i; int iovlen; struct nfsex_args eargs; FILE *debug_file; if ((debug_file = fopen(_PATH_MOUNTDDEBUG, "r")) != NULL) { fclose(debug_file); logdebug = 1; } else logdebug = 0; LOGDEBUG("passno=%d", passno); v4root_dirpath[0] = '\0'; free_v4rootexp(); if (passno == 1) { /* * Save the current lists as old ones, so that the new lists * can be compared with the old ones in the 2nd pass. */ for (i = 0; i < exphashsize; i++) { SLIST_FIRST(&oldexphead[i]) = SLIST_FIRST(&exphead[i]); SLIST_INIT(&exphead[i]); } /* Note that the public fh has not yet been set. */ has_set_publicfh = 0; /* Read the export file(s) and process them */ read_exportfile(passno); } else { /* * Just make the old lists empty. * exphashsize == 0 for the first call, before oldexphead * has been initialized-->loop won't be executed. */ for (i = 0; i < exphashsize; i++) SLIST_INIT(&oldexphead[i]); } bzero(&export, sizeof(export)); export.ex_flags = MNT_DELEXPORT; iov = NULL; iovlen = 0; bzero(errmsg, sizeof(errmsg)); if (suspend_nfsd != 0) (void)nfssvc(NFSSVC_SUSPENDNFSD, NULL); /* * Delete the old V4 root dir. */ bzero(&eargs, sizeof (eargs)); eargs.export.ex_flags = MNT_DELEXPORT; - if (nfssvc(NFSSVC_V4ROOTEXPORT, (caddr_t)&eargs) < 0 && + if (nfssvc(NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT, (caddr_t)&eargs) < 0 && errno != ENOENT) syslog(LOG_ERR, "Can't delete exports for V4:"); build_iovec(&iov, &iovlen, "fstype", NULL, 0); build_iovec(&iov, &iovlen, "fspath", NULL, 0); build_iovec(&iov, &iovlen, "from", NULL, 0); build_iovec(&iov, &iovlen, "update", NULL, 0); build_iovec(&iov, &iovlen, "export", &export, sizeof(export)); build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); /* * For passno == 1, compare the old and new lists updating the kernel * exports for any cases that have changed. * This call is doing the second pass through the lists. * If it fails, fall back on the bulk reload. */ if (passno == 1 && compare_nmount_exportlist(iov, iovlen, errmsg) == 0) { LOGDEBUG("compareok"); /* Free up the old lists. */ free_exports(oldexphead); } else { LOGDEBUG("doing passno=0"); /* * Clear flag that notes if a public fh has been exported. - * It is set by do_mount() if MNTEX_PUBLIC is set for the entry. + * It is set by do_mount() if MNT_EXPUBLIC is set for the entry. */ has_publicfh = 0; /* exphead == NULL if not yet allocated (first call). */ if (exphead != NULL) { /* * First, get rid of the old lists. */ free_exports(exphead); free_exports(oldexphead); } /* * And delete exports that are in the kernel for all local * filesystems. * XXX: Should know how to handle all local exportable * filesystems. */ num = getmntinfo(&mntbufp, MNT_NOWAIT); /* Allocate hash tables, for first call. */ if (exphead == NULL) { /* Target an average linked list length of 10. */ exphashsize = num / 10; if (exphashsize < 1) exphashsize = 1; else if (exphashsize > 100000) exphashsize = 100000; exphead = malloc(exphashsize * sizeof(*exphead)); oldexphead = malloc(exphashsize * sizeof(*oldexphead)); if (exphead == NULL || oldexphead == NULL) errx(1, "Can't malloc hash tables"); for (i = 0; i < exphashsize; i++) { SLIST_INIT(&exphead[i]); SLIST_INIT(&oldexphead[i]); } } for (i = 0; i < num; i++) delete_export(iov, iovlen, &mntbufp[i], errmsg); /* Read the export file(s) and process them */ read_exportfile(0); } if (iov != NULL) { /* Free strings allocated by strdup() in getmntopts.c */ free(iov[0].iov_base); /* fstype */ free(iov[2].iov_base); /* fspath */ free(iov[4].iov_base); /* from */ free(iov[6].iov_base); /* update */ free(iov[8].iov_base); /* export */ free(iov[10].iov_base); /* errmsg */ /* free iov, allocated by realloc() */ free(iov); iovlen = 0; } /* * If there was no public fh, clear any previous one set. */ if (has_publicfh == 0) { LOGDEBUG("clear public fh"); (void) nfssvc(NFSSVC_NOPUBLICFH, NULL); } /* Resume the nfsd. If they weren't suspended, this is harmless. */ (void)nfssvc(NFSSVC_RESUMENFSD, NULL); LOGDEBUG("eo get_exportlist"); } /* * Insert an export entry in the appropriate list. */ static void insert_exports(struct exportlist *ep, struct exportlisthead *exhp) { uint32_t i; i = EXPHASH(&ep->ex_fs); LOGDEBUG("fs=%s hash=%i", ep->ex_fsdir, i); SLIST_INSERT_HEAD(&exhp[i], ep, entries); } /* * Free up the exports lists passed in as arguments. */ static void free_exports(struct exportlisthead *exhp) { struct exportlist *ep, *ep2; int i; for (i = 0; i < exphashsize; i++) { SLIST_FOREACH_SAFE(ep, &exhp[i], entries, ep2) { SLIST_REMOVE(&exhp[i], ep, exportlist, entries); free_exp(ep); } SLIST_INIT(&exhp[i]); } } /* * Read the exports file(s) and call get_exportlist_one() for each line. */ static void read_exportfile(int passno) { int done, i; /* * Read in the exports file and build the list, calling * nmount() as we go along to push the export rules into the kernel. */ done = 0; for (i = 0; exnames[i] != NULL; i++) { if (debug) warnx("reading exports from %s", exnames[i]); if ((exp_file = fopen(exnames[i], "r")) == NULL) { syslog(LOG_WARNING, "can't open %s", exnames[i]); continue; } get_exportlist_one(passno); fclose(exp_file); done++; } if (done == 0) { syslog(LOG_ERR, "can't open any exports file"); exit(2); } } /* * Compare the export lists against the old ones and do nmount() operations * for any cases that have changed. This avoids doing nmount() for entries * that have not changed. * Return 0 upon success, 1 otherwise. */ static int compare_nmount_exportlist(struct iovec *iov, int iovlen, char *errmsg) { struct exportlist *ep, *oep; struct grouplist *grp; struct statfs fs, ofs; int i, ret; /* * Loop through the current list and look for an entry in the old * list. * If found, check to see if it the same. * If it is not the same, delete and re-export. * Then mark it done on the old list. * else (not found) * export it. * Any entries left in the old list after processing must have their * exports deleted. */ for (i = 0; i < exphashsize; i++) SLIST_FOREACH(ep, &exphead[i], entries) { LOGDEBUG("foreach ep=%s", ep->ex_fsdir); oep = ex_search(&ep->ex_fs, oldexphead); if (oep != NULL) { /* * Check the mount paths are the same. * If not, return 1 so that the reload of the * exports will be done in bulk, the * passno == 0 way. */ LOGDEBUG("found old exp"); if (strcmp(ep->ex_fsdir, oep->ex_fsdir) != 0) return (1); LOGDEBUG("same fsdir"); /* * Test to see if the entry is the same. * If not the same delete exports and * re-export. */ if (compare_export(ep, oep) != 0) { /* * Clear has_publicfh if if was set * in the old exports, but only if it * has not been set during processing of * the exports for this pass, as * indicated by has_set_publicfh. */ if (has_set_publicfh == 0 && (oep->ex_flag & EX_PUBLICFH) != 0) has_publicfh = 0; /* Delete and re-export. */ if (statfs(ep->ex_fsdir, &fs) < 0) return (1); delete_export(iov, iovlen, &fs, errmsg); ret = do_export_mount(ep, &fs); if (ret != 0) return (ret); } oep->ex_flag |= EX_DONE; LOGDEBUG("exdone"); } else { LOGDEBUG("not found so export"); /* Not found, so do export. */ if (statfs(ep->ex_fsdir, &fs) < 0) return (1); ret = do_export_mount(ep, &fs); if (ret != 0) return (ret); } } /* Delete exports not done. */ for (i = 0; i < exphashsize; i++) SLIST_FOREACH(oep, &oldexphead[i], entries) { if ((oep->ex_flag & EX_DONE) == 0) { LOGDEBUG("not done delete=%s", oep->ex_fsdir); if (statfs(oep->ex_fsdir, &ofs) >= 0 && - oep->ex_fs.val[0] == ofs.f_fsid.val[0] && - oep->ex_fs.val[1] == ofs.f_fsid.val[1]) { + fsidcmp(&oep->ex_fs, &ofs.f_fsid) == 0) { LOGDEBUG("do delete"); /* * Clear has_publicfh if if was set * in the old exports, but only if it * has not been set during processing of * the exports for this pass, as * indicated by has_set_publicfh. */ if (has_set_publicfh == 0 && (oep->ex_flag & EX_PUBLICFH) != 0) has_publicfh = 0; delete_export(iov, iovlen, &ofs, errmsg); } } } /* Do the V4 root exports, as required. */ grp = NULL; if (v4root_ep != NULL) grp = v4root_ep->ex_grphead; v4root_phase = 2; while (v4root_ep != NULL && grp != NULL) { LOGDEBUG("v4root expath=%s", v4root_dirpath); ret = do_mount(v4root_ep, grp, grp->gr_exflags, &grp->gr_anon, v4root_dirpath, strlen(v4root_dirpath), &fs, grp->gr_numsecflavors, grp->gr_secflavors); if (ret != 0) { v4root_phase = 0; return (ret); } grp = grp->gr_next; } v4root_phase = 0; free_v4rootexp(); return (0); } /* * Compare old and current exportlist entries for the fsid and return 0 * if they are the same, 1 otherwise. */ static int compare_export(struct exportlist *ep, struct exportlist *oep) { struct grouplist *grp, *ogrp; if (strcmp(ep->ex_fsdir, oep->ex_fsdir) != 0) return (1); if ((ep->ex_flag & EX_DEFSET) != (oep->ex_flag & EX_DEFSET)) return (1); if ((ep->ex_defdir != NULL && oep->ex_defdir == NULL) || (ep->ex_defdir == NULL && oep->ex_defdir != NULL)) return (1); if (ep->ex_defdir != NULL && (ep->ex_defdir->dp_flag & DP_DEFSET) != (oep->ex_defdir->dp_flag & DP_DEFSET)) return (1); if ((ep->ex_flag & EX_DEFSET) != 0 && (ep->ex_defnumsecflavors != oep->ex_defnumsecflavors || ep->ex_defexflags != oep->ex_defexflags || compare_cred(&ep->ex_defanon, &oep->ex_defanon) != 0 || compare_secflavor(ep->ex_defsecflavors, oep->ex_defsecflavors, ep->ex_defnumsecflavors) != 0)) return (1); /* Now, check all the groups. */ for (ogrp = oep->ex_grphead; ogrp != NULL; ogrp = ogrp->gr_next) ogrp->gr_flag = 0; for (grp = ep->ex_grphead; grp != NULL; grp = grp->gr_next) { for (ogrp = oep->ex_grphead; ogrp != NULL; ogrp = ogrp->gr_next) if ((ogrp->gr_flag & GR_FND) == 0 && grp->gr_numsecflavors == ogrp->gr_numsecflavors && grp->gr_exflags == ogrp->gr_exflags && compare_cred(&grp->gr_anon, &ogrp->gr_anon) == 0 && compare_secflavor(grp->gr_secflavors, ogrp->gr_secflavors, grp->gr_numsecflavors) == 0) break; if (ogrp != NULL) ogrp->gr_flag |= GR_FND; else return (1); } for (ogrp = oep->ex_grphead; ogrp != NULL; ogrp = ogrp->gr_next) if ((ogrp->gr_flag & GR_FND) == 0) return (1); return (0); } /* * This algorithm compares two arrays of "n" items. It returns 0 if they are * the "same" and 1 otherwise. Although suboptimal, it is always safe to * return 1, which makes compare_nmount_export() reload the exports entry. * "same" refers to having the same set of values in the two arrays. * The arrays are in no particular order and duplicates (multiple entries * in an array with the same value) is allowed. * The algorithm is inefficient, but the common case of indentical arrays is * handled first and "n" is normally fairly small. * Since the two functions need the same algorithm but for arrays of * different types (gid_t vs int), this is done as a macro. */ #define COMPARE_ARRAYS(a1, a2, n) \ do { \ int fnd, fndarray[(n)], i, j; \ /* Handle common case of identical arrays. */ \ for (i = 0; i < (n); i++) \ if ((a1)[i] != (a2)[i]) \ break; \ if (i == (n)) \ return (0); \ for (i = 0; i < (n); i++) \ fndarray[i] = 0; \ for (i = 0; i < (n); i++) { \ fnd = 0; \ for (j = 0; j < (n); j++) { \ if ((a1)[i] == (a2)[j]) { \ fndarray[j] = 1; \ fnd = 1; \ } \ } \ if (fnd == 0) \ return (1); \ } \ for (i = 0; i < (n); i++) \ if (fndarray[i] == 0) \ return (1); \ return (0); \ } while (0) /* - * Compare to struct xucred's. Return 0 if the same and 1 otherwise. + * Compare two struct expcred's. Return 0 if the same and 1 otherwise. */ static int -compare_cred(struct xucred *cr0, struct xucred *cr1) +compare_cred(struct expcred *cr0, struct expcred *cr1) { if (cr0->cr_uid != cr1->cr_uid || cr0->cr_ngroups != cr1->cr_ngroups) return (1); COMPARE_ARRAYS(cr0->cr_groups, cr1->cr_groups, cr0->cr_ngroups); } /* * Compare two lists of security flavors. Return 0 if the same and 1 otherwise. */ static int compare_secflavor(int *sec1, int *sec2, int nsec) { COMPARE_ARRAYS(sec1, sec2, nsec); } /* * Delete an exports entry. */ static void delete_export(struct iovec *iov, int iovlen, struct statfs *fsp, char *errmsg) { struct xvfsconf vfc; if (getvfsbyname(fsp->f_fstypename, &vfc) != 0) { syslog(LOG_ERR, "getvfsbyname() failed for %s", fsp->f_fstypename); return; } /* * We do not need to delete "export" flag from * filesystems that do not have it set. */ if (!(fsp->f_flags & MNT_EXPORTED)) return; /* * Do not delete export for network filesystem by * passing "export" arg to nmount(). * It only makes sense to do this for local filesystems. */ if (vfc.vfc_flags & VFCF_NETWORK) return; iov[1].iov_base = fsp->f_fstypename; iov[1].iov_len = strlen(fsp->f_fstypename) + 1; iov[3].iov_base = fsp->f_mntonname; iov[3].iov_len = strlen(fsp->f_mntonname) + 1; iov[5].iov_base = fsp->f_mntfromname; iov[5].iov_len = strlen(fsp->f_mntfromname) + 1; errmsg[0] = '\0'; /* * EXDEV is returned when path exists but is not a * mount point. May happens if raced with unmount. */ if (nmount(iov, iovlen, fsp->f_flags) < 0 && errno != ENOENT && errno != ENOTSUP && errno != EXDEV) { syslog(LOG_ERR, "can't delete exports for %s: %m %s", fsp->f_mntonname, errmsg); } } /* * Allocate an export list element */ static struct exportlist * get_exp(void) { struct exportlist *ep; ep = (struct exportlist *)calloc(1, sizeof (struct exportlist)); if (ep == (struct exportlist *)NULL) out_of_mem(); return (ep); } /* * Allocate a group list element */ static struct grouplist * get_grp(void) { struct grouplist *gp; gp = (struct grouplist *)calloc(1, sizeof (struct grouplist)); if (gp == (struct grouplist *)NULL) out_of_mem(); return (gp); } /* * Clean up upon an error in get_exportlist(). */ static void getexp_err(struct exportlist *ep, struct grouplist *grp, const char *reason) { struct grouplist *tgrp; if (!(opt_flags & OP_QUIET)) { if (reason != NULL) syslog(LOG_ERR, "bad exports list line '%s': %s", line, reason); else syslog(LOG_ERR, "bad exports list line '%s'", line); } if (ep && (ep->ex_flag & EX_LINKED) == 0) free_exp(ep); while (grp) { tgrp = grp; grp = grp->gr_next; free_grp(tgrp); } } /* * Search the export list for a matching fs. */ static struct exportlist * ex_search(fsid_t *fsid, struct exportlisthead *exhp) { struct exportlist *ep; uint32_t i; i = EXPHASH(fsid); SLIST_FOREACH(ep, &exhp[i], entries) { - if (ep->ex_fs.val[0] == fsid->val[0] && - ep->ex_fs.val[1] == fsid->val[1]) + if (fsidcmp(&ep->ex_fs, fsid) == 0) return (ep); } return (ep); } /* * Add a directory path to the list. */ static char * add_expdir(struct dirlist **dpp, char *cp, int len) { struct dirlist *dp; dp = malloc(sizeof (struct dirlist)); if (dp == (struct dirlist *)NULL) out_of_mem(); dp->dp_left = *dpp; dp->dp_right = (struct dirlist *)NULL; dp->dp_flag = 0; dp->dp_hosts = (struct hostlist *)NULL; dp->dp_dirp = strndup(cp, len); if (dp->dp_dirp == NULL) out_of_mem(); *dpp = dp; return (dp->dp_dirp); } /* * Hang the dir list element off the dirpath binary tree as required * and update the entry for host. */ static void hang_dirp(struct dirlist *dp, struct grouplist *grp, struct exportlist *ep, - int flags, struct xucred *anoncrp, int exflags) + int flags, struct expcred *anoncrp, uint64_t exflags) { struct hostlist *hp; struct dirlist *dp2; if (flags & OP_ALLDIRS) { if (ep->ex_defdir) free((caddr_t)dp); else ep->ex_defdir = dp; if (grp == (struct grouplist *)NULL) { ep->ex_flag |= EX_DEFSET; ep->ex_defdir->dp_flag |= DP_DEFSET; /* Save the default security flavors list. */ ep->ex_defnumsecflavors = ep->ex_numsecflavors; if (ep->ex_numsecflavors > 0) memcpy(ep->ex_defsecflavors, ep->ex_secflavors, sizeof(ep->ex_secflavors)); - ep->ex_defanon = *anoncrp; + cp_cred(&ep->ex_defanon, anoncrp); ep->ex_defexflags = exflags; } else while (grp) { hp = get_ht(); hp->ht_grp = grp; hp->ht_next = ep->ex_defdir->dp_hosts; ep->ex_defdir->dp_hosts = hp; /* Save the security flavors list for this host set. */ grp->gr_numsecflavors = ep->ex_numsecflavors; if (ep->ex_numsecflavors > 0) memcpy(grp->gr_secflavors, ep->ex_secflavors, sizeof(ep->ex_secflavors)); grp = grp->gr_next; } } else { /* * Loop through the directories adding them to the tree. */ while (dp) { dp2 = dp->dp_left; add_dlist(&ep->ex_dirl, dp, grp, flags, ep, anoncrp, exflags); dp = dp2; } } } /* * Traverse the binary tree either updating a node that is already there * for the new directory or adding the new node. */ static void add_dlist(struct dirlist **dpp, struct dirlist *newdp, struct grouplist *grp, - int flags, struct exportlist *ep, struct xucred *anoncrp, int exflags) + int flags, struct exportlist *ep, struct expcred *anoncrp, + uint64_t exflags) { struct dirlist *dp; struct hostlist *hp; int cmp; dp = *dpp; if (dp) { cmp = strcmp(dp->dp_dirp, newdp->dp_dirp); if (cmp > 0) { add_dlist(&dp->dp_left, newdp, grp, flags, ep, anoncrp, exflags); return; } else if (cmp < 0) { add_dlist(&dp->dp_right, newdp, grp, flags, ep, anoncrp, exflags); return; } else free((caddr_t)newdp); } else { dp = newdp; dp->dp_left = (struct dirlist *)NULL; *dpp = dp; } if (grp) { /* * Hang all of the host(s) off of the directory point. */ do { hp = get_ht(); hp->ht_grp = grp; hp->ht_next = dp->dp_hosts; dp->dp_hosts = hp; /* Save the security flavors list for this host set. */ grp->gr_numsecflavors = ep->ex_numsecflavors; if (ep->ex_numsecflavors > 0) memcpy(grp->gr_secflavors, ep->ex_secflavors, sizeof(ep->ex_secflavors)); grp = grp->gr_next; } while (grp); } else { ep->ex_flag |= EX_DEFSET; dp->dp_flag |= DP_DEFSET; /* Save the default security flavors list. */ ep->ex_defnumsecflavors = ep->ex_numsecflavors; if (ep->ex_numsecflavors > 0) memcpy(ep->ex_defsecflavors, ep->ex_secflavors, sizeof(ep->ex_secflavors)); - ep->ex_defanon = *anoncrp; + cp_cred(&ep->ex_defanon, anoncrp); ep->ex_defexflags = exflags; } } /* * Search for a dirpath on the export point. */ static struct dirlist * dirp_search(struct dirlist *dp, char *dirp) { int cmp; if (dp) { cmp = strcmp(dp->dp_dirp, dirp); if (cmp > 0) return (dirp_search(dp->dp_left, dirp)); else if (cmp < 0) return (dirp_search(dp->dp_right, dirp)); else return (dp); } return (dp); } /* * Scan for a host match in a directory tree. */ static int chk_host(struct dirlist *dp, struct sockaddr *saddr, int *defsetp, int *hostsetp, int *numsecflavors, int **secflavorsp) { struct hostlist *hp; struct grouplist *grp; struct addrinfo *ai; if (dp) { if (dp->dp_flag & DP_DEFSET) *defsetp = dp->dp_flag; hp = dp->dp_hosts; while (hp) { grp = hp->ht_grp; switch (grp->gr_type) { case GT_HOST: ai = grp->gr_ptr.gt_addrinfo; for (; ai; ai = ai->ai_next) { if (!sacmp(ai->ai_addr, saddr, NULL)) { *hostsetp = (hp->ht_flag | DP_HOSTSET); if (numsecflavors != NULL) { *numsecflavors = grp->gr_numsecflavors; *secflavorsp = grp->gr_secflavors; } return (1); } } break; case GT_NET: if (!sacmp(saddr, (struct sockaddr *) &grp->gr_ptr.gt_net.nt_net, (struct sockaddr *) &grp->gr_ptr.gt_net.nt_mask)) { *hostsetp = (hp->ht_flag | DP_HOSTSET); if (numsecflavors != NULL) { *numsecflavors = grp->gr_numsecflavors; *secflavorsp = grp->gr_secflavors; } return (1); } break; } hp = hp->ht_next; } } return (0); } /* * Scan tree for a host that matches the address. */ static int scan_tree(struct dirlist *dp, struct sockaddr *saddr) { int defset, hostset; if (dp) { if (scan_tree(dp->dp_left, saddr)) return (1); if (chk_host(dp, saddr, &defset, &hostset, NULL, NULL)) return (1); if (scan_tree(dp->dp_right, saddr)) return (1); } return (0); } /* * Traverse the dirlist tree and free it up. */ static void free_dir(struct dirlist *dp) { if (dp) { free_dir(dp->dp_left); free_dir(dp->dp_right); free_host(dp->dp_hosts); free(dp->dp_dirp); free(dp); } } /* * Parse a colon separated list of security flavors */ static int parsesec(char *seclist, struct exportlist *ep) { char *cp, savedc; int flavor; ep->ex_numsecflavors = 0; for (;;) { cp = strchr(seclist, ':'); if (cp) { savedc = *cp; *cp = '\0'; } if (!strcmp(seclist, "sys")) flavor = AUTH_SYS; else if (!strcmp(seclist, "krb5")) flavor = RPCSEC_GSS_KRB5; else if (!strcmp(seclist, "krb5i")) flavor = RPCSEC_GSS_KRB5I; else if (!strcmp(seclist, "krb5p")) flavor = RPCSEC_GSS_KRB5P; else { if (cp) *cp = savedc; syslog(LOG_ERR, "bad sec flavor: %s", seclist); return (1); } if (ep->ex_numsecflavors == MAXSECFLAVORS) { if (cp) *cp = savedc; syslog(LOG_ERR, "too many sec flavors: %s", seclist); return (1); } ep->ex_secflavors[ep->ex_numsecflavors] = flavor; ep->ex_numsecflavors++; if (cp) { *cp = savedc; seclist = cp + 1; } else { break; } } return (0); } /* * Parse the option string and update fields. * Option arguments may either be -