diff --git a/sys/fs/nfs/nfs.h b/sys/fs/nfs/nfs.h index 272b8dbfee22..1a29a7e1d6ec 100644 --- a/sys/fs/nfs/nfs.h +++ b/sys/fs/nfs/nfs.h @@ -1,826 +1,826 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFS_H_ #define _NFS_NFS_H_ /* * Tunable constants for nfs */ #define NFS_MAXIOVEC 34 #define NFS_TICKINTVL 500 /* Desired time for a tick (msec) */ #define NFS_HZ (hz / nfscl_ticks) /* Ticks/sec */ #define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ #define NFS_TCPTIMEO 300 /* TCP timeout */ #define NFS_MAXRCVTIMEO 60 /* 1 minute in seconds */ #define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFSV4_CALLBACKTIMEO 800 /* Timeout in msec */ #define NFSV4_CALLBACKRETRY 5 /* Number of retries before failure */ #define NFSV4_SLOTS 64 /* Number of slots, fore channel */ #define NFSV4_CBSLOTS 8 /* Number of slots, back channel */ #define NFSV4_CBRETRYCNT 4 /* # of CBRecall retries upon err */ #define NFSV4_UPCALLTIMEO (15 * NFS_HZ) /* Timeout in ticks for upcalls */ /* to gssd or nfsuserd */ #define NFSV4_UPCALLRETRY 4 /* Number of retries before failure */ #define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */ #define NFS_RETRANS 10 /* Num of retrans for soft mounts */ #define NFS_RETRANS_TCP 2 /* Num of retrans for TCP soft mounts */ #define NFS_MAXGRPS 16 /* Max. size of groups list */ #define NFS_TRYLATERDEL 15 /* Maximum delay timeout (sec) */ #ifndef NFS_REMOVETIMEO #define NFS_REMOVETIMEO 15 /* # sec to wait for delegret in local syscall */ #endif #ifndef NFS_MINATTRTIMO #define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */ #endif #ifndef NFS_MAXATTRTIMO #define NFS_MAXATTRTIMO 60 #endif #define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ #define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ #define NFS_READDIRSIZE 8192 /* Def. readdir size */ #define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 16 /* Max. read ahead # blocks */ #define NFS_MAXASYNCDAEMON 64 /* Max. number async_daemons runnable */ #define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */ #ifndef NFSRV_LEASE #define NFSRV_LEASE 120 /* Lease time in seconds for V4 */ #endif /* assigned to nfsrv_lease */ #ifndef NFSRV_STALELEASE #define NFSRV_STALELEASE (5 * nfsrv_lease) #endif #ifndef NFSRV_MOULDYLEASE #define NFSRV_MOULDYLEASE 604800 /* One week (in sec) */ #endif #ifndef NFSCLIENTHASHSIZE #define NFSCLIENTHASHSIZE 20 /* Size of server client hash table */ #endif #ifndef NFSLOCKHASHSIZE #define NFSLOCKHASHSIZE 20 /* Size of server nfslock hash table */ #endif #ifndef NFSSESSIONHASHSIZE #define NFSSESSIONHASHSIZE 20 /* Size of server session hash table */ #endif #define NFSSTATEHASHSIZE 10 /* Size of server stateid hash table */ #define NFSLAYOUTHIGHWATER 1000000 /* Upper limit for # of layouts */ #ifndef NFSCLDELEGHIGHWATER #define NFSCLDELEGHIGHWATER 10000 /* limit for client delegations */ #endif #ifndef NFSCLLAYOUTHIGHWATER #define NFSCLLAYOUTHIGHWATER 10000 /* limit for client pNFS layouts */ #endif #ifndef NFSNOOPEN /* Inactive open owner (sec) */ #define NFSNOOPEN 120 #endif #define NFSRV_LEASEDELTA 15 /* # of seconds to delay beyond lease */ #define NFS_IDMAXSIZE 4 /* max sizeof (in_addr_t) */ #ifndef NFSRVCACHE_UDPTIMEOUT #define NFSRVCACHE_UDPTIMEOUT 30 /* # of sec to hold cached rpcs(udp) */ #endif #ifndef NFSRVCACHE_UDPHIGHWATER #define NFSRVCACHE_UDPHIGHWATER 500 /* Max # of udp cache entries */ #endif #ifndef NFSRVCACHE_TCPTIMEOUT #define NFSRVCACHE_TCPTIMEOUT (3600*12) /*#of sec to hold cached rpcs(tcp) */ #endif #ifndef NFSRVCACHE_FLOODLEVEL #define NFSRVCACHE_FLOODLEVEL 16384 /* Very high water mark for cache */ #endif #ifndef NFSRV_CLIENTHIGHWATER #define NFSRV_CLIENTHIGHWATER 1000 #endif #ifndef NFSRV_MAXDUMPLIST #define NFSRV_MAXDUMPLIST 10000 #endif #ifndef NFS_ACCESSCACHESIZE #define NFS_ACCESSCACHESIZE 8 #endif #define NFSV4_CBPORT 7745 /* Callback port for testing */ /* * This macro defines the high water mark for issuing V4 delegations. * (It is currently set at a conservative 20% of nfsrv_v4statelimit. This * may want to increase when clients can make more effective use of * delegations.) */ #define NFSRV_V4DELEGLIMIT(c) (((c) * 5) > nfsrv_v4statelimit) #define NFS_READDIRBLKSIZ DIRBLKSIZ /* Minimal nm_readdirsize */ /* * Oddballs */ #define NFS_CMPFH(n, f, s) \ ((n)->n_fhp->nfh_len == (s) && !NFSBCMP((n)->n_fhp->nfh_fh, (caddr_t)(f), (s))) #define NFSRV_CMPFH(nf, ns, f, s) \ ((ns) == (s) && !NFSBCMP((caddr_t)(nf), (caddr_t)(f), (s))) #define NFS_CMPTIME(t1, t2) \ ((t1).tv_sec == (t2).tv_sec && (t1).tv_nsec == (t2).tv_nsec) #define NFS_SETTIME(t) do { \ (t).tv_sec = time.tv_sec; (t).tv_nsec = 1000 * time.tv_usec; } while (0) #define NFS_SRVMAXDATA(n) \ (((n)->nd_flag & (ND_NFSV3 | ND_NFSV4)) ? \ - NFS_SRVMAXIO : NFS_V2MAXDATA) + nfs_srvmaxio : NFS_V2MAXDATA) #define NFS64BITSSET 0xffffffffffffffffull #define NFS64BITSMINUS1 0xfffffffffffffffeull /* * Structures for the nfssvc(2) syscall. Not that anyone but nfsd, mount_nfs * and nfsloaduser should ever try and use it. */ struct nfsd_addsock_args { int sock; /* Socket to serve */ caddr_t name; /* Client addr for connection based sockets */ int namelen; /* Length of name */ }; /* * nfsd argument for new krpc. * (New version supports pNFS, indicated by NFSSVC_NEWSTRUCT flag.) */ struct nfsd_nfsd_args { const char *principal; /* GSS-API service principal name */ int minthreads; /* minimum service thread count */ int maxthreads; /* maximum service thread count */ int version; /* Allow multiple variants */ char *addr; /* pNFS DS addresses */ int addrlen; /* Length of addrs */ char *dnshost; /* DNS names for DS addresses */ int dnshostlen; /* Length of DNS names */ char *dspath; /* DS Mount path on MDS */ int dspathlen; /* Length of DS Mount path on MDS */ char *mdspath; /* MDS mount for DS path on MDS */ int mdspathlen; /* Length of MDS mount for DS path on MDS */ int mirrorcnt; /* Number of mirrors to create on DSs */ }; /* * NFSDEV_MAXMIRRORS - Maximum level of mirroring for a DS. * (Most will only put files on two DSs, but this setting allows up to 4.) * NFSDEV_MAXVERS - maximum number of NFS versions supported by Flex File. */ #define NFSDEV_MAXMIRRORS 4 #define NFSDEV_MAXVERS 4 struct nfsd_pnfsd_args { int op; /* Which pNFSd op to perform. */ char *mdspath; /* Path of MDS file. */ char *dspath; /* Path of recovered DS mounted on dir. */ char *curdspath; /* Path of current DS mounted on dir. */ }; #define PNFSDOP_DELDSSERVER 1 #define PNFSDOP_COPYMR 2 #define PNFSDOP_FORCEDELDS 3 /* Old version. */ struct nfsd_nfsd_oargs { const char *principal; /* GSS-API service principal name */ int minthreads; /* minimum service thread count */ int maxthreads; /* maximum service thread count */ }; /* * Arguments for use by the callback daemon. */ struct nfsd_nfscbd_args { const char *principal; /* GSS-API service principal name */ }; struct nfscbd_args { int sock; /* Socket to serve */ caddr_t name; /* Client addr for connection based sockets */ int namelen; /* Length of name */ u_short port; /* Port# for callbacks */ }; struct nfsd_idargs { int nid_flag; /* Flags (see below) */ uid_t nid_uid; /* user/group id */ gid_t nid_gid; int nid_usermax; /* Upper bound on user name cache */ int nid_usertimeout;/* User name timeout (minutes) */ u_char *nid_name; /* Name */ int nid_namelen; /* and its length */ gid_t *nid_grps; /* and the list */ int nid_ngroup; /* Size of groups list */ }; struct nfsd_oidargs { int nid_flag; /* Flags (see below) */ uid_t nid_uid; /* user/group id */ gid_t nid_gid; int nid_usermax; /* Upper bound on user name cache */ int nid_usertimeout;/* User name timeout (minutes) */ u_char *nid_name; /* Name */ int nid_namelen; /* and its length */ }; struct nfsuserd_args { sa_family_t nuserd_family; /* Address family to use */ u_short nuserd_port; /* Port# */ }; struct nfsd_clid { int nclid_idlen; /* Length of client id */ u_char nclid_id[NFSV4_OPAQUELIMIT]; /* and name */ }; struct nfsd_dumplist { int ndl_size; /* Number of elements */ void *ndl_list; /* and the list of elements */ }; struct nfsd_dumpclients { u_int32_t ndcl_flags; /* LCL_xxx flags */ u_int32_t ndcl_nopenowners; /* Number of openowners */ u_int32_t ndcl_nopens; /* and opens */ u_int32_t ndcl_nlockowners; /* and of lockowners */ u_int32_t ndcl_nlocks; /* and of locks */ u_int32_t ndcl_ndelegs; /* and of delegations */ u_int32_t ndcl_nolddelegs; /* and old delegations */ sa_family_t ndcl_addrfam; /* Callback address */ union { struct in_addr sin_addr; struct in6_addr sin6_addr; } ndcl_cbaddr; struct nfsd_clid ndcl_clid; /* and client id */ }; struct nfsd_dumplocklist { char *ndllck_fname; /* File Name */ int ndllck_size; /* Number of elements */ void *ndllck_list; /* and the list of elements */ }; struct nfsd_dumplocks { u_int32_t ndlck_flags; /* state flags NFSLCK_xxx */ nfsv4stateid_t ndlck_stateid; /* stateid */ u_int64_t ndlck_first; /* lock byte range */ u_int64_t ndlck_end; struct nfsd_clid ndlck_owner; /* Owner of open/lock */ sa_family_t ndlck_addrfam; /* Callback address */ union { struct in_addr sin_addr; struct in6_addr sin6_addr; } ndlck_cbaddr; struct nfsd_clid ndlck_clid; /* and client id */ }; /* * Structure for referral information. */ struct nfsreferral { u_char *nfr_srvlist; /* List of servers */ int nfr_srvcnt; /* number of servers */ vnode_t nfr_vp; /* vnode for referral */ uint64_t nfr_dfileno; /* assigned dir inode# */ }; /* * Flags for lc_flags and opsflags for nfsrv_getclient(). */ #define LCL_NEEDSCONFIRM 0x00000001 #define LCL_DONTCLEAN 0x00000002 #define LCL_WAKEUPWANTED 0x00000004 #define LCL_TCPCALLBACK 0x00000008 #define LCL_CALLBACKSON 0x00000010 #define LCL_INDEXNOTOK 0x00000020 #define LCL_STAMPEDSTABLE 0x00000040 #define LCL_EXPIREIT 0x00000080 #define LCL_CBDOWN 0x00000100 #define LCL_KERBV 0x00000400 #define LCL_NAME 0x00000800 #define LCL_NEEDSCBNULL 0x00001000 #define LCL_GSSINTEGRITY 0x00002000 #define LCL_GSSPRIVACY 0x00004000 #define LCL_ADMINREVOKED 0x00008000 #define LCL_RECLAIMCOMPLETE 0x00010000 #define LCL_NFSV41 0x00020000 #define LCL_DONEBINDCONN 0x00040000 #define LCL_RECLAIMONEFS 0x00080000 #define LCL_NFSV42 0x00100000 #define LCL_TLSCB 0x00200000 #define LCL_GSS LCL_KERBV /* Or of all mechs */ /* * Bits for flags in nfslock and nfsstate. * The access, deny, NFSLCK_READ and NFSLCK_WRITE bits must be defined as * below, in the correct order, so the shifts work for tests. */ #define NFSLCK_READACCESS 0x00000001 #define NFSLCK_WRITEACCESS 0x00000002 #define NFSLCK_ACCESSBITS (NFSLCK_READACCESS | NFSLCK_WRITEACCESS) #define NFSLCK_SHIFT 2 #define NFSLCK_READDENY 0x00000004 #define NFSLCK_WRITEDENY 0x00000008 #define NFSLCK_DENYBITS (NFSLCK_READDENY | NFSLCK_WRITEDENY) #define NFSLCK_SHAREBITS \ (NFSLCK_READACCESS|NFSLCK_WRITEACCESS|NFSLCK_READDENY|NFSLCK_WRITEDENY) #define NFSLCK_LOCKSHIFT 4 #define NFSLCK_READ 0x00000010 #define NFSLCK_WRITE 0x00000020 #define NFSLCK_BLOCKING 0x00000040 #define NFSLCK_RECLAIM 0x00000080 #define NFSLCK_OPENTOLOCK 0x00000100 #define NFSLCK_TEST 0x00000200 #define NFSLCK_LOCK 0x00000400 #define NFSLCK_UNLOCK 0x00000800 #define NFSLCK_OPEN 0x00001000 #define NFSLCK_CLOSE 0x00002000 #define NFSLCK_CHECK 0x00004000 #define NFSLCK_RELEASE 0x00008000 #define NFSLCK_NEEDSCONFIRM 0x00010000 #define NFSLCK_CONFIRM 0x00020000 #define NFSLCK_DOWNGRADE 0x00040000 #define NFSLCK_DELEGREAD 0x00080000 #define NFSLCK_DELEGWRITE 0x00100000 #define NFSLCK_DELEGCUR 0x00200000 #define NFSLCK_DELEGPREV 0x00400000 #define NFSLCK_OLDDELEG 0x00800000 #define NFSLCK_DELEGRECALL 0x01000000 #define NFSLCK_SETATTR 0x02000000 #define NFSLCK_DELEGPURGE 0x04000000 #define NFSLCK_DELEGRETURN 0x08000000 #define NFSLCK_WANTWDELEG 0x10000000 #define NFSLCK_WANTRDELEG 0x20000000 #define NFSLCK_WANTNODELEG 0x40000000 #define NFSLCK_WANTBITS \ (NFSLCK_WANTWDELEG | NFSLCK_WANTRDELEG | NFSLCK_WANTNODELEG) /* And bits for nid_flag */ #define NFSID_INITIALIZE 0x0001 #define NFSID_ADDUID 0x0002 #define NFSID_DELUID 0x0004 #define NFSID_ADDUSERNAME 0x0008 #define NFSID_DELUSERNAME 0x0010 #define NFSID_ADDGID 0x0020 #define NFSID_DELGID 0x0040 #define NFSID_ADDGROUPNAME 0x0080 #define NFSID_DELGROUPNAME 0x0100 /* * fs.nfs sysctl(3) identifiers */ #define NFS_NFSSTATS 1 /* struct: struct nfsstats */ /* * Here is the definition of the attribute bits array and macros that * manipulate it. * THE MACROS MUST BE MANUALLY MODIFIED IF NFSATTRBIT_MAXWORDS CHANGES!! * It is (NFSATTRBIT_MAX + 31) / 32. */ #define NFSATTRBIT_MAXWORDS 3 typedef struct { u_int32_t bits[NFSATTRBIT_MAXWORDS]; } nfsattrbit_t; #define NFSZERO_ATTRBIT(b) do { \ (b)->bits[0] = 0; \ (b)->bits[1] = 0; \ (b)->bits[2] = 0; \ } while (0) #define NFSSET_ATTRBIT(t, f) do { \ (t)->bits[0] = (f)->bits[0]; \ (t)->bits[1] = (f)->bits[1]; \ (t)->bits[2] = (f)->bits[2]; \ } while (0) #define NFSSETSUPP_ATTRBIT(b, n) do { \ (b)->bits[0] = NFSATTRBIT_SUPP0; \ (b)->bits[1] = (NFSATTRBIT_SUPP1 | NFSATTRBIT_SUPPSETONLY1); \ (b)->bits[2] = (NFSATTRBIT_SUPP2 | NFSATTRBIT_SUPPSETONLY2); \ if (((n)->nd_flag & ND_NFSV41) == 0) { \ (b)->bits[1] &= ~NFSATTRBIT_NFSV41_1; \ (b)->bits[2] &= ~NFSATTRBIT_NFSV41_2; \ } \ if (((n)->nd_flag & ND_NFSV42) == 0) \ (b)->bits[2] &= ~NFSATTRBIT_NFSV42_2; \ } while (0) #define NFSISSET_ATTRBIT(b, p) ((b)->bits[(p) / 32] & (1 << ((p) % 32))) #define NFSSETBIT_ATTRBIT(b, p) ((b)->bits[(p) / 32] |= (1 << ((p) % 32))) #define NFSCLRBIT_ATTRBIT(b, p) ((b)->bits[(p) / 32] &= ~(1 << ((p) % 32))) #define NFSCLRALL_ATTRBIT(b, a) do { \ (b)->bits[0] &= ~((a)->bits[0]); \ (b)->bits[1] &= ~((a)->bits[1]); \ (b)->bits[2] &= ~((a)->bits[2]); \ } while (0) #define NFSCLRNOT_ATTRBIT(b, a) do { \ (b)->bits[0] &= ((a)->bits[0]); \ (b)->bits[1] &= ((a)->bits[1]); \ (b)->bits[2] &= ((a)->bits[2]); \ } while (0) #define NFSCLRNOTFILLABLE_ATTRBIT(b, n) do { \ (b)->bits[0] &= NFSATTRBIT_SUPP0; \ (b)->bits[1] &= NFSATTRBIT_SUPP1; \ (b)->bits[2] &= NFSATTRBIT_SUPP2; \ if (((n)->nd_flag & ND_NFSV41) == 0) { \ (b)->bits[1] &= ~NFSATTRBIT_NFSV41_1; \ (b)->bits[2] &= ~NFSATTRBIT_NFSV41_2; \ } \ if (((n)->nd_flag & ND_NFSV42) == 0) \ (b)->bits[2] &= ~NFSATTRBIT_NFSV42_2; \ } while (0) #define NFSCLRNOTSETABLE_ATTRBIT(b, n) do { \ (b)->bits[0] &= NFSATTRBIT_SETABLE0; \ (b)->bits[1] &= NFSATTRBIT_SETABLE1; \ (b)->bits[2] &= NFSATTRBIT_SETABLE2; \ if (((n)->nd_flag & ND_NFSV41) == 0) \ (b)->bits[2] &= ~NFSATTRBIT_NFSV41_2; \ if (((n)->nd_flag & ND_NFSV42) == 0) \ (b)->bits[2] &= ~NFSATTRBIT_NFSV42_2; \ } while (0) #define NFSNONZERO_ATTRBIT(b) ((b)->bits[0] || (b)->bits[1] || (b)->bits[2]) #define NFSEQUAL_ATTRBIT(b, p) ((b)->bits[0] == (p)->bits[0] && \ (b)->bits[1] == (p)->bits[1] && (b)->bits[2] == (p)->bits[2]) #define NFSGETATTR_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_GETATTR0; \ (b)->bits[1] = NFSATTRBIT_GETATTR1; \ (b)->bits[2] = NFSATTRBIT_GETATTR2; \ } while (0) #define NFSWCCATTR_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_WCCATTR0; \ (b)->bits[1] = NFSATTRBIT_WCCATTR1; \ (b)->bits[2] = NFSATTRBIT_WCCATTR2; \ } while (0) #define NFSWRITEGETATTR_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_WRITEGETATTR0; \ (b)->bits[1] = NFSATTRBIT_WRITEGETATTR1; \ (b)->bits[2] = NFSATTRBIT_WRITEGETATTR2; \ } while (0) #define NFSCBGETATTR_ATTRBIT(b, c) do { \ (c)->bits[0] = ((b)->bits[0] & NFSATTRBIT_CBGETATTR0); \ (c)->bits[1] = ((b)->bits[1] & NFSATTRBIT_CBGETATTR1); \ (c)->bits[2] = ((b)->bits[2] & NFSATTRBIT_CBGETATTR2); \ } while (0) #define NFSPATHCONF_GETATTRBIT(b) do { \ (b)->bits[0] = NFSGETATTRBIT_PATHCONF0; \ (b)->bits[1] = NFSGETATTRBIT_PATHCONF1; \ (b)->bits[2] = NFSGETATTRBIT_PATHCONF2; \ } while (0) #define NFSSTATFS_GETATTRBIT(b) do { \ (b)->bits[0] = NFSGETATTRBIT_STATFS0; \ (b)->bits[1] = NFSGETATTRBIT_STATFS1; \ (b)->bits[2] = NFSGETATTRBIT_STATFS2; \ } while (0) #define NFSISSETSTATFS_ATTRBIT(b) \ (((b)->bits[0] & NFSATTRBIT_STATFS0) || \ ((b)->bits[1] & NFSATTRBIT_STATFS1) || \ ((b)->bits[2] & NFSATTRBIT_STATFS2)) #define NFSCLRSTATFS_ATTRBIT(b) do { \ (b)->bits[0] &= ~NFSATTRBIT_STATFS0; \ (b)->bits[1] &= ~NFSATTRBIT_STATFS1; \ (b)->bits[2] &= ~NFSATTRBIT_STATFS2; \ } while (0) #define NFSREADDIRPLUS_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_READDIRPLUS0; \ (b)->bits[1] = NFSATTRBIT_READDIRPLUS1; \ (b)->bits[2] = NFSATTRBIT_READDIRPLUS2; \ } while (0) #define NFSREFERRAL_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_REFERRAL0; \ (b)->bits[1] = NFSATTRBIT_REFERRAL1; \ (b)->bits[2] = NFSATTRBIT_REFERRAL2; \ } while (0) /* * Store uid, gid creds that were used when the stateid was acquired. * The RPC layer allows NFS_MAXGRPS + 1 groups to go out on the wire, * so that's how many gets stored here. */ struct nfscred { uid_t nfsc_uid; gid_t nfsc_groups[NFS_MAXGRPS + 1]; int nfsc_ngroups; }; /* * Constants that define the file handle for the V4 root directory. * (The FSID must never be used by other file systems that are exported.) */ #define NFSV4ROOT_FSID0 ((int32_t) -1) #define NFSV4ROOT_FSID1 ((int32_t) -1) #define NFSV4ROOT_REFERRAL ((int32_t) -2) #define NFSV4ROOT_INO 2 /* It's traditional */ #define NFSV4ROOT_GEN 1 /* * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts. * What should be in this set is open to debate, but I believe that since * I/O system calls on ufs are never interrupted by signals the set should * be minimal. My reasoning is that many current programs that use signals * such as SIGALRM will not expect file I/O system calls to be interrupted * by them and break. */ #if defined(_KERNEL) || defined(KERNEL) struct uio; struct buf; struct vattr; struct nameidata; /* XXX */ /* * Socket errors ignored for connectionless sockets? * For now, ignore them all */ #define NFSIGNORE_SOERROR(s, e) \ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ ((s) & PR_CONNREQUIRED) == 0) /* * This structure holds socket information for a connection. Used by the * client and the server for callbacks. */ struct nfssockreq { NFSSOCKADDR_T nr_nam; int nr_sotype; int nr_soproto; int nr_soflags; struct ucred *nr_cred; int nr_lock; NFSMUTEX_T nr_mtx; u_int32_t nr_prog; u_int32_t nr_vers; struct __rpc_client *nr_client; AUTH *nr_auth; }; /* * And associated nr_lock bits. */ #define NFSR_SNDLOCK 0x01 #define NFSR_WANTSND 0x02 #define NFSR_RCVLOCK 0x04 #define NFSR_WANTRCV 0x08 #define NFSR_RESERVEDPORT 0x10 #define NFSR_LOCALHOST 0x20 /* * Queue head for nfsreq's */ TAILQ_HEAD(nfsreqhead, nfsreq); /* This is the only nfsreq R_xxx flag still used. */ #define R_DONTRECOVER 0x00000100 /* don't initiate recovery when this rpc gets a stale state reply */ /* * Network address hash list element */ union nethostaddr { struct in_addr had_inet; struct in6_addr had_inet6; }; /* * Structure of list of mechanisms. */ struct nfsgss_mechlist { int len; const u_char *str; int totlen; }; #define KERBV_MECH 0 /* position in list */ /* * This structure is used by the server for describing each request. */ struct nfsrv_descript { struct mbuf *nd_mrep; /* Request mbuf list */ struct mbuf *nd_md; /* Current dissect mbuf */ struct mbuf *nd_mreq; /* Reply mbuf list */ struct mbuf *nd_mb; /* Current build mbuf */ NFSSOCKADDR_T nd_nam; /* and socket addr */ NFSSOCKADDR_T nd_nam2; /* return socket addr */ caddr_t nd_dpos; /* Current dissect pos */ caddr_t nd_bpos; /* Current build pos */ u_int64_t nd_flag; /* nd_flag */ u_int16_t nd_procnum; /* RPC # */ u_int32_t nd_repstat; /* Reply status */ int *nd_errp; /* Pointer to ret status */ u_int32_t nd_retxid; /* Reply xid */ struct nfsrvcache *nd_rp; /* Assoc. cache entry */ fhandle_t nd_fh; /* File handle */ struct ucred *nd_cred; /* Credentials */ uid_t nd_saveduid; /* Saved uid */ u_int64_t nd_sockref; /* Rcv socket ref# */ u_int64_t nd_compref; /* Compound RPC ref# */ time_t nd_tcpconntime; /* Time TCP connection est. */ nfsquad_t nd_clientid; /* Implied clientid */ int nd_gssnamelen; /* principal name length */ char *nd_gssname; /* principal name */ uint32_t *nd_slotseq; /* ptr to slot seq# in req */ uint8_t nd_sessionid[NFSX_V4SESSIONID]; /* Session id */ uint32_t nd_slotid; /* Slotid for this RPC */ SVCXPRT *nd_xprt; /* Server RPC handle */ uint32_t *nd_sequence; /* Sequence Op. ptr */ nfsv4stateid_t nd_curstateid; /* Current StateID */ nfsv4stateid_t nd_savedcurstateid; /* Saved Current StateID */ uint32_t nd_maxreq; /* Max. request (session). */ uint32_t nd_maxresp; /* Max. reply (session). */ int nd_bextpg; /* Current ext_pgs page */ int nd_bextpgsiz; /* Bytes left in page */ int nd_maxextsiz; /* Max ext_pgs mbuf size */ }; #define nd_princlen nd_gssnamelen #define nd_principal nd_gssname /* Bits for "nd_flag" */ #define ND_DONTSAVEREPLY 0x00000001 #define ND_SAVEREPLY 0x00000002 #define ND_NFSV2 0x00000004 #define ND_NFSV3 0x00000008 #define ND_NFSV4 0x00000010 #define ND_KERBV 0x00000020 #define ND_GSSINTEGRITY 0x00000040 #define ND_GSSPRIVACY 0x00000080 #define ND_WINDOWVERF 0x00000100 #define ND_GSSINITREPLY 0x00000200 #define ND_STREAMSOCK 0x00000400 #define ND_PUBLOOKUP 0x00000800 #define ND_USEGSSNAME 0x00001000 #define ND_SAMETCPCONN 0x00002000 #define ND_IMPLIEDCLID 0x00004000 #define ND_NOMOREDATA 0x00008000 #define ND_V4WCCATTR 0x00010000 #define ND_NFSCB 0x00020000 #define ND_AUTHNONE 0x00040000 #define ND_EXAUTHSYS 0x00080000 #define ND_EXGSS 0x00100000 #define ND_EXGSSINTEGRITY 0x00200000 #define ND_EXGSSPRIVACY 0x00400000 #define ND_INCRSEQID 0x00800000 #define ND_NFSCL 0x01000000 #define ND_NFSV41 0x02000000 #define ND_HASSEQUENCE 0x04000000 #define ND_CACHETHIS 0x08000000 #define ND_LASTOP 0x10000000 #define ND_LOOPBADSESS 0x20000000 #define ND_DSSERVER 0x40000000 #define ND_CURSTATEID 0x80000000 #define ND_SAVEDCURSTATEID 0x100000000 #define ND_HASSLOTID 0x200000000 #define ND_NFSV42 0x400000000 #define ND_EXTPG 0x800000000 #define ND_TLS 0x1000000000 #define ND_TLSCERT 0x2000000000 #define ND_TLSCERTUSER 0x4000000000 #define ND_EXTLS 0x8000000000 #define ND_EXTLSCERT 0x10000000000 #define ND_EXTLSCERTUSER 0x20000000000 #define ND_ERELOOKUP 0x40000000000 /* * ND_GSS should be the "or" of all GSS type authentications. */ #define ND_GSS (ND_KERBV) struct nfsv4_opflag { int retfh; int needscfh; int savereply; int modifyfs; int lktype; int needsseq; int loopbadsess; }; /* * Flags used to indicate what to do w.r.t. seqid checking. */ #define NFSRVSEQID_FIRST 0x01 #define NFSRVSEQID_LAST 0x02 #define NFSRVSEQID_OPEN 0x04 /* * assign a doubly linked list to a new head * and prepend one list into another. */ #define LIST_NEWHEAD(nhead, ohead, field) do { \ if (((nhead)->lh_first = (ohead)->lh_first) != NULL) \ (ohead)->lh_first->field.le_prev = &(nhead)->lh_first; \ (ohead)->lh_first = NULL; \ } while (0) #define LIST_PREPEND(head, phead, lelm, field) do { \ if ((head)->lh_first != NULL) { \ (lelm)->field.le_next = (head)->lh_first; \ (lelm)->field.le_next->field.le_prev = \ &(lelm)->field.le_next; \ } \ (head)->lh_first = (phead)->lh_first; \ (head)->lh_first->field.le_prev = &(head)->lh_first; \ } while (0) /* * File handle structure for client. Malloc'd to the correct length with * malloc type M_NFSFH. */ struct nfsfh { u_int16_t nfh_len; /* Length of file handle */ u_int8_t nfh_fh[1]; /* and the file handle */ }; /* * File handle structure for server. The NFSRV_MAXFH constant is * set in nfsdport.h. I use a 32bit length, so that alignment is * preserved. */ struct nfsrvfh { u_int32_t nfsrvfh_len; u_int8_t nfsrvfh_data[NFSRV_MAXFH]; }; /* * This structure is used for sleep locks on the NFSv4 nfsd threads and * NFSv4 client data structures. */ struct nfsv4lock { u_int32_t nfslock_usecnt; u_int8_t nfslock_lock; }; #define NFSV4LOCK_LOCK 0x01 #define NFSV4LOCK_LOCKWANTED 0x02 #define NFSV4LOCK_WANTED 0x04 /* * Values for the override argument for nfsvno_accchk(). */ #define NFSACCCHK_NOOVERRIDE 0 #define NFSACCCHK_ALLOWROOT 1 #define NFSACCCHK_ALLOWOWNER 2 /* * and values for the vpislocked argument for nfsvno_accchk(). */ #define NFSACCCHK_VPNOTLOCKED 0 #define NFSACCCHK_VPISLOCKED 1 /* * Slot for the NFSv4.1 Sequence Op. */ struct nfsslot { int nfssl_inprog; uint32_t nfssl_seq; struct mbuf *nfssl_reply; }; /* Enumerated type for nfsuserd state. */ typedef enum { NOTRUNNING=0, STARTSTOP=1, RUNNING=2 } nfsuserd_state; #endif /* _KERNEL */ #endif /* _NFS_NFS_H */ diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c index 47038980c640..8a100749fc32 100644 --- a/sys/fs/nfs/nfs_commonport.c +++ b/sys/fs/nfs/nfs_commonport.c @@ -1,946 +1,947 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Functions that need to be different for different versions of BSD * kernel should be kept here, along with any global storage specific * to this BSD variant. */ #include #include #include #include #include #include #include #include #include #include #include #include #include extern int nfscl_ticks; extern nfsuserd_state nfsrv_nfsuserd; extern struct nfssockreq nfsrv_nfsuserdsock; extern void (*nfsd_call_recall)(struct vnode *, int, struct ucred *, struct thread *); extern int nfsrv_useacl; struct mount nfsv4root_mnt; int newnfs_numnfsd = 0; struct nfsstatsv1 nfsstatsv1; int nfs_numnfscbd = 0; int nfscl_debuglevel = 0; char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; struct callout newnfsd_callout; int nfsrv_lughashsize = 100; struct mtx nfsrv_dslock_mtx; struct nfsdevicehead nfsrv_devidhead; volatile int nfsrv_devidcnt = 0; void (*nfsd_call_servertimer)(void) = NULL; void (*ncl_call_invalcaches)(struct vnode *) = NULL; vop_advlock_t *nfs_advlock_p = NULL; vop_reclaim_t *nfs_reclaim_p = NULL; +uint32_t nfs_srvmaxio = NFS_SRVMAXIO; int nfs_pnfsio(task_fn_t *, void *); static int nfs_realign_test; static int nfs_realign_count; static struct ext_nfsstats oldnfsstats; static struct nfsstatsov1 nfsstatsov1; SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "NFS filesystem"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, "Number of realign tests done"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, "Number of mbuf realignments done"); SYSCTL_STRING(_vfs_nfs, OID_AUTO, callback_addr, CTLFLAG_RW, nfsv4_callbackaddr, sizeof(nfsv4_callbackaddr), "NFSv4 callback addr for server to use"); SYSCTL_INT(_vfs_nfs, OID_AUTO, debuglevel, CTLFLAG_RW, &nfscl_debuglevel, 0, "Debug level for NFS client"); SYSCTL_INT(_vfs_nfs, OID_AUTO, userhashsize, CTLFLAG_RDTUN, &nfsrv_lughashsize, 0, "Size of hash tables for uid/name mapping"); int nfs_pnfsiothreads = -1; SYSCTL_INT(_vfs_nfs, OID_AUTO, pnfsiothreads, CTLFLAG_RW, &nfs_pnfsiothreads, 0, "Number of pNFS mirror I/O threads"); /* * Defines for malloc * (Here for FreeBSD, since they allocate storage.) */ MALLOC_DEFINE(M_NEWNFSRVCACHE, "NFSD srvcache", "NFSD Server Request Cache"); MALLOC_DEFINE(M_NEWNFSDCLIENT, "NFSD V4client", "NFSD V4 Client Id"); MALLOC_DEFINE(M_NEWNFSDSTATE, "NFSD V4state", "NFSD V4 State (Openowner, Open, Lockowner, Delegation"); MALLOC_DEFINE(M_NEWNFSDLOCK, "NFSD V4lock", "NFSD V4 byte range lock"); MALLOC_DEFINE(M_NEWNFSDLOCKFILE, "NFSD lckfile", "NFSD Open/Lock file"); MALLOC_DEFINE(M_NEWNFSSTRING, "NFSD string", "NFSD V4 long string"); MALLOC_DEFINE(M_NEWNFSUSERGROUP, "NFSD usrgroup", "NFSD V4 User/group map"); MALLOC_DEFINE(M_NEWNFSDREQ, "NFS req", "NFS request header"); MALLOC_DEFINE(M_NEWNFSFH, "NFS fh", "NFS file handle"); MALLOC_DEFINE(M_NEWNFSCLOWNER, "NFSCL owner", "NFSCL Open Owner"); MALLOC_DEFINE(M_NEWNFSCLOPEN, "NFSCL open", "NFSCL Open"); MALLOC_DEFINE(M_NEWNFSCLDELEG, "NFSCL deleg", "NFSCL Delegation"); MALLOC_DEFINE(M_NEWNFSCLCLIENT, "NFSCL client", "NFSCL Client"); MALLOC_DEFINE(M_NEWNFSCLLOCKOWNER, "NFSCL lckown", "NFSCL Lock Owner"); MALLOC_DEFINE(M_NEWNFSCLLOCK, "NFSCL lck", "NFSCL Lock"); MALLOC_DEFINE(M_NEWNFSV4NODE, "NEWNFSnode", "NFS vnode"); MALLOC_DEFINE(M_NEWNFSDIRECTIO, "NEWdirectio", "NFS Direct IO buffer"); MALLOC_DEFINE(M_NEWNFSDIROFF, "NFSCL diroff", "NFS directory offset data"); MALLOC_DEFINE(M_NEWNFSDROLLBACK, "NFSD rollback", "NFS local lock rollback"); MALLOC_DEFINE(M_NEWNFSLAYOUT, "NFSCL layout", "NFSv4.1 Layout"); MALLOC_DEFINE(M_NEWNFSFLAYOUT, "NFSCL flayout", "NFSv4.1 File Layout"); MALLOC_DEFINE(M_NEWNFSDEVINFO, "NFSCL devinfo", "NFSv4.1 Device Info"); MALLOC_DEFINE(M_NEWNFSSOCKREQ, "NFSCL sockreq", "NFS Sock Req"); MALLOC_DEFINE(M_NEWNFSCLDS, "NFSCL session", "NFSv4.1 Session"); MALLOC_DEFINE(M_NEWNFSLAYRECALL, "NFSCL layrecall", "NFSv4.1 Layout Recall"); MALLOC_DEFINE(M_NEWNFSDSESSION, "NFSD session", "NFSD Session for a client"); /* * Definition of mutex locks. * newnfsd_mtx is used in nfsrvd_nfsd() to protect the nfs socket list * and assorted other nfsd structures. */ struct mtx newnfsd_mtx; struct mtx nfs_sockl_mutex; struct mtx nfs_state_mutex; struct mtx nfs_nameid_mutex; struct mtx nfs_req_mutex; struct mtx nfs_slock_mutex; struct mtx nfs_clstate_mutex; /* local functions */ static int nfssvc_call(struct thread *, struct nfssvc_args *, struct ucred *); #ifdef __NO_STRICT_ALIGNMENT /* * These architectures don't need re-alignment, so just return. */ int newnfs_realign(struct mbuf **pm, int how) { return (0); } #else /* !__NO_STRICT_ALIGNMENT */ /* * newnfs_realign: * * Check for badly aligned mbuf data and realign by copying the unaligned * portion of the data into a new mbuf chain and freeing the portions * of the old chain that were replaced. * * We cannot simply realign the data within the existing mbuf chain * because the underlying buffers may contain other rpc commands and * we cannot afford to overwrite them. * * We would prefer to avoid this situation entirely. The situation does * not occur with NFS/UDP and is supposed to only occasionally occur * with TCP. Use vfs.nfs.realign_count and realign_test to check this. * */ int newnfs_realign(struct mbuf **pm, int how) { struct mbuf *m, *n; int off, space; ++nfs_realign_test; while ((m = *pm) != NULL) { if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { /* * NB: we can't depend on m_pkthdr.len to help us * decide what to do here. May not be worth doing * the m_length calculation as m_copyback will * expand the mbuf chain below as needed. */ space = m_length(m, NULL); if (space >= MINCLSIZE) { /* NB: m_copyback handles space > MCLBYTES */ n = m_getcl(how, MT_DATA, 0); } else n = m_get(how, MT_DATA); if (n == NULL) return (ENOMEM); /* * Align the remainder of the mbuf chain. */ n->m_len = 0; off = 0; while (m != NULL) { m_copyback(n, off, m->m_len, mtod(m, caddr_t)); off += m->m_len; m = m->m_next; } m_freem(*pm); *pm = n; ++nfs_realign_count; break; } pm = &m->m_next; } return (0); } #endif /* __NO_STRICT_ALIGNMENT */ #ifdef notdef static void nfsrv_object_create(struct vnode *vp, struct thread *td) { if (vp == NULL || vp->v_type != VREG) return; (void) vfs_object_create(vp, td, td->td_ucred); } #endif /* * Look up a file name. Basically just initialize stuff and call namei(). */ int nfsrv_lookupfilename(struct nameidata *ndp, char *fname, NFSPROC_T *p) { int error; NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fname, p); error = namei(ndp); if (!error) { NDFREE(ndp, NDF_ONLY_PNBUF); } return (error); } /* * Copy NFS uid, gids to the cred structure. */ void newnfs_copycred(struct nfscred *nfscr, struct ucred *cr) { KASSERT(nfscr->nfsc_ngroups >= 0, ("newnfs_copycred: negative nfsc_ngroups")); cr->cr_uid = nfscr->nfsc_uid; crsetgroups(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups); } /* * Map args from nfsmsleep() to msleep(). */ int nfsmsleep(void *chan, void *mutex, int prio, const char *wmesg, struct timespec *ts) { u_int64_t nsecval; int error, timeo; if (ts) { timeo = hz * ts->tv_sec; nsecval = (u_int64_t)ts->tv_nsec; nsecval = ((nsecval * ((u_int64_t)hz)) + 500000000) / 1000000000; timeo += (int)nsecval; } else { timeo = 0; } error = msleep(chan, (struct mtx *)mutex, prio, wmesg, timeo); return (error); } /* * Get the file system info for the server. For now, just assume FFS. */ void nfsvno_getfs(struct nfsfsinfo *sip, int isdgram) { int pref; /* * XXX * There should be file system VFS OP(s) to get this information. * For now, assume ufs. */ if (isdgram) pref = NFS_MAXDGRAMDATA; else - pref = NFS_SRVMAXIO; - sip->fs_rtmax = NFS_SRVMAXIO; + pref = nfs_srvmaxio; + sip->fs_rtmax = nfs_srvmaxio; sip->fs_rtpref = pref; sip->fs_rtmult = NFS_FABLKSIZE; - sip->fs_wtmax = NFS_SRVMAXIO; + sip->fs_wtmax = nfs_srvmaxio; sip->fs_wtpref = pref; sip->fs_wtmult = NFS_FABLKSIZE; sip->fs_dtpref = pref; sip->fs_maxfilesize = 0xffffffffffffffffull; sip->fs_timedelta.tv_sec = 0; sip->fs_timedelta.tv_nsec = 1; sip->fs_properties = (NFSV3FSINFO_LINK | NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); } /* * Do the pathconf vnode op. */ int nfsvno_pathconf(struct vnode *vp, int flag, long *retf, struct ucred *cred, struct thread *p) { int error; error = VOP_PATHCONF(vp, flag, retf); if (error == EOPNOTSUPP || error == EINVAL) { /* * Some file systems return EINVAL for name arguments not * supported and some return EOPNOTSUPP for this case. * So the NFSv3 Pathconf RPC doesn't fail for these cases, * just fake them. */ switch (flag) { case _PC_LINK_MAX: *retf = NFS_LINK_MAX; break; case _PC_NAME_MAX: *retf = NAME_MAX; break; case _PC_CHOWN_RESTRICTED: *retf = 1; break; case _PC_NO_TRUNC: *retf = 1; break; default: /* * Only happens if a _PC_xxx is added to the server, * but this isn't updated. */ *retf = 0; printf("nfsrvd pathconf flag=%d not supp\n", flag); } error = 0; } NFSEXITCODE(error); return (error); } /* Fake nfsrv_atroot. Just return 0 */ int nfsrv_atroot(struct vnode *vp, uint64_t *retp) { return (0); } /* * Set the credentials to refer to root. * If only the various BSDen could agree on whether cr_gid is a separate * field or cr_groups[0]... */ void newnfs_setroot(struct ucred *cred) { cred->cr_uid = 0; cred->cr_groups[0] = 0; cred->cr_ngroups = 1; } /* * Get the client credential. Used for Renew and recovery. */ struct ucred * newnfs_getcred(void) { struct ucred *cred; struct thread *td = curthread; cred = crdup(td->td_ucred); newnfs_setroot(cred); return (cred); } /* * Nfs timer routine * Call the nfsd's timer function once/sec. */ void newnfs_timer(void *arg) { static time_t lasttime = 0; /* * Call the server timer, if set up. * The argument indicates if it is the next second and therefore * leases should be checked. */ if (lasttime != NFSD_MONOSEC) { lasttime = NFSD_MONOSEC; if (nfsd_call_servertimer != NULL) (*nfsd_call_servertimer)(); } callout_reset(&newnfsd_callout, nfscl_ticks, newnfs_timer, NULL); } /* * Sleep for a short period of time unless errval == NFSERR_GRACE, where * the sleep should be for 5 seconds. * Since lbolt doesn't exist in FreeBSD-CURRENT, just use a timeout on * an event that never gets a wakeup. Only return EINTR or 0. */ int nfs_catnap(int prio, int errval, const char *wmesg) { static int non_event; int ret; if (errval == NFSERR_GRACE) ret = tsleep(&non_event, prio, wmesg, 5 * hz); else ret = tsleep(&non_event, prio, wmesg, 1); if (ret != EINTR) ret = 0; return (ret); } /* * Get referral. For now, just fail. */ struct nfsreferral * nfsv4root_getreferral(struct vnode *vp, struct vnode *dvp, u_int32_t fileno) { return (NULL); } static int nfssvc_nfscommon(struct thread *td, struct nfssvc_args *uap) { int error; error = nfssvc_call(td, uap, td->td_ucred); NFSEXITCODE(error); return (error); } static int nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { int error = EINVAL, i, j; struct nfsd_idargs nid; struct nfsd_oidargs onid; struct { int vers; /* Just the first field of nfsstats. */ } nfsstatver; if (uap->flag & NFSSVC_IDNAME) { if ((uap->flag & NFSSVC_NEWSTRUCT) != 0) error = copyin(uap->argp, &nid, sizeof(nid)); else { error = copyin(uap->argp, &onid, sizeof(onid)); if (error == 0) { nid.nid_flag = onid.nid_flag; nid.nid_uid = onid.nid_uid; nid.nid_gid = onid.nid_gid; nid.nid_usermax = onid.nid_usermax; nid.nid_usertimeout = onid.nid_usertimeout; nid.nid_name = onid.nid_name; nid.nid_namelen = onid.nid_namelen; nid.nid_ngroup = 0; nid.nid_grps = NULL; } } if (error) goto out; error = nfssvc_idname(&nid); goto out; } else if (uap->flag & NFSSVC_GETSTATS) { if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { /* Copy fields to the old ext_nfsstat structure. */ oldnfsstats.attrcache_hits = nfsstatsv1.attrcache_hits; oldnfsstats.attrcache_misses = nfsstatsv1.attrcache_misses; oldnfsstats.lookupcache_hits = nfsstatsv1.lookupcache_hits; oldnfsstats.lookupcache_misses = nfsstatsv1.lookupcache_misses; oldnfsstats.direofcache_hits = nfsstatsv1.direofcache_hits; oldnfsstats.direofcache_misses = nfsstatsv1.direofcache_misses; oldnfsstats.accesscache_hits = nfsstatsv1.accesscache_hits; oldnfsstats.accesscache_misses = nfsstatsv1.accesscache_misses; oldnfsstats.biocache_reads = nfsstatsv1.biocache_reads; oldnfsstats.read_bios = nfsstatsv1.read_bios; oldnfsstats.read_physios = nfsstatsv1.read_physios; oldnfsstats.biocache_writes = nfsstatsv1.biocache_writes; oldnfsstats.write_bios = nfsstatsv1.write_bios; oldnfsstats.write_physios = nfsstatsv1.write_physios; oldnfsstats.biocache_readlinks = nfsstatsv1.biocache_readlinks; oldnfsstats.readlink_bios = nfsstatsv1.readlink_bios; oldnfsstats.biocache_readdirs = nfsstatsv1.biocache_readdirs; oldnfsstats.readdir_bios = nfsstatsv1.readdir_bios; for (i = 0; i < NFSV4_NPROCS; i++) oldnfsstats.rpccnt[i] = nfsstatsv1.rpccnt[i]; oldnfsstats.rpcretries = nfsstatsv1.rpcretries; for (i = 0; i < NFSV4OP_NOPS; i++) oldnfsstats.srvrpccnt[i] = nfsstatsv1.srvrpccnt[i]; for (i = NFSV42_NOPS, j = NFSV4OP_NOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) oldnfsstats.srvrpccnt[j] = nfsstatsv1.srvrpccnt[i]; oldnfsstats.reserved_0 = 0; oldnfsstats.reserved_1 = 0; oldnfsstats.rpcrequests = nfsstatsv1.rpcrequests; oldnfsstats.rpctimeouts = nfsstatsv1.rpctimeouts; oldnfsstats.rpcunexpected = nfsstatsv1.rpcunexpected; oldnfsstats.rpcinvalid = nfsstatsv1.rpcinvalid; oldnfsstats.srvcache_inproghits = nfsstatsv1.srvcache_inproghits; oldnfsstats.reserved_2 = 0; oldnfsstats.srvcache_nonidemdonehits = nfsstatsv1.srvcache_nonidemdonehits; oldnfsstats.srvcache_misses = nfsstatsv1.srvcache_misses; oldnfsstats.srvcache_tcppeak = nfsstatsv1.srvcache_tcppeak; oldnfsstats.srvcache_size = nfsstatsv1.srvcache_size; oldnfsstats.srvclients = nfsstatsv1.srvclients; oldnfsstats.srvopenowners = nfsstatsv1.srvopenowners; oldnfsstats.srvopens = nfsstatsv1.srvopens; oldnfsstats.srvlockowners = nfsstatsv1.srvlockowners; oldnfsstats.srvlocks = nfsstatsv1.srvlocks; oldnfsstats.srvdelegates = nfsstatsv1.srvdelegates; for (i = 0; i < NFSV4OP_CBNOPS; i++) oldnfsstats.cbrpccnt[i] = nfsstatsv1.cbrpccnt[i]; oldnfsstats.clopenowners = nfsstatsv1.clopenowners; oldnfsstats.clopens = nfsstatsv1.clopens; oldnfsstats.cllockowners = nfsstatsv1.cllockowners; oldnfsstats.cllocks = nfsstatsv1.cllocks; oldnfsstats.cldelegates = nfsstatsv1.cldelegates; oldnfsstats.cllocalopenowners = nfsstatsv1.cllocalopenowners; oldnfsstats.cllocalopens = nfsstatsv1.cllocalopens; oldnfsstats.cllocallockowners = nfsstatsv1.cllocallockowners; oldnfsstats.cllocallocks = nfsstatsv1.cllocallocks; error = copyout(&oldnfsstats, uap->argp, sizeof (oldnfsstats)); } else { error = copyin(uap->argp, &nfsstatver, sizeof(nfsstatver)); if (error == 0) { if (nfsstatver.vers == NFSSTATS_OV1) { /* Copy nfsstatsv1 to nfsstatsov1. */ nfsstatsov1.attrcache_hits = nfsstatsv1.attrcache_hits; nfsstatsov1.attrcache_misses = nfsstatsv1.attrcache_misses; nfsstatsov1.lookupcache_hits = nfsstatsv1.lookupcache_hits; nfsstatsov1.lookupcache_misses = nfsstatsv1.lookupcache_misses; nfsstatsov1.direofcache_hits = nfsstatsv1.direofcache_hits; nfsstatsov1.direofcache_misses = nfsstatsv1.direofcache_misses; nfsstatsov1.accesscache_hits = nfsstatsv1.accesscache_hits; nfsstatsov1.accesscache_misses = nfsstatsv1.accesscache_misses; nfsstatsov1.biocache_reads = nfsstatsv1.biocache_reads; nfsstatsov1.read_bios = nfsstatsv1.read_bios; nfsstatsov1.read_physios = nfsstatsv1.read_physios; nfsstatsov1.biocache_writes = nfsstatsv1.biocache_writes; nfsstatsov1.write_bios = nfsstatsv1.write_bios; nfsstatsov1.write_physios = nfsstatsv1.write_physios; nfsstatsov1.biocache_readlinks = nfsstatsv1.biocache_readlinks; nfsstatsov1.readlink_bios = nfsstatsv1.readlink_bios; nfsstatsov1.biocache_readdirs = nfsstatsv1.biocache_readdirs; nfsstatsov1.readdir_bios = nfsstatsv1.readdir_bios; for (i = 0; i < NFSV42_NPROCS; i++) nfsstatsov1.rpccnt[i] = nfsstatsv1.rpccnt[i]; nfsstatsov1.rpcretries = nfsstatsv1.rpcretries; for (i = 0; i < NFSV42_PURENOPS; i++) nfsstatsov1.srvrpccnt[i] = nfsstatsv1.srvrpccnt[i]; for (i = NFSV42_NOPS, j = NFSV42_PURENOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) nfsstatsov1.srvrpccnt[j] = nfsstatsv1.srvrpccnt[i]; nfsstatsov1.reserved_0 = 0; nfsstatsov1.reserved_1 = 0; nfsstatsov1.rpcrequests = nfsstatsv1.rpcrequests; nfsstatsov1.rpctimeouts = nfsstatsv1.rpctimeouts; nfsstatsov1.rpcunexpected = nfsstatsv1.rpcunexpected; nfsstatsov1.rpcinvalid = nfsstatsv1.rpcinvalid; nfsstatsov1.srvcache_inproghits = nfsstatsv1.srvcache_inproghits; nfsstatsov1.reserved_2 = 0; nfsstatsov1.srvcache_nonidemdonehits = nfsstatsv1.srvcache_nonidemdonehits; nfsstatsov1.srvcache_misses = nfsstatsv1.srvcache_misses; nfsstatsov1.srvcache_tcppeak = nfsstatsv1.srvcache_tcppeak; nfsstatsov1.srvcache_size = nfsstatsv1.srvcache_size; nfsstatsov1.srvclients = nfsstatsv1.srvclients; nfsstatsov1.srvopenowners = nfsstatsv1.srvopenowners; nfsstatsov1.srvopens = nfsstatsv1.srvopens; nfsstatsov1.srvlockowners = nfsstatsv1.srvlockowners; nfsstatsov1.srvlocks = nfsstatsv1.srvlocks; nfsstatsov1.srvdelegates = nfsstatsv1.srvdelegates; for (i = 0; i < NFSV42_CBNOPS; i++) nfsstatsov1.cbrpccnt[i] = nfsstatsv1.cbrpccnt[i]; nfsstatsov1.clopenowners = nfsstatsv1.clopenowners; nfsstatsov1.clopens = nfsstatsv1.clopens; nfsstatsov1.cllockowners = nfsstatsv1.cllockowners; nfsstatsov1.cllocks = nfsstatsv1.cllocks; nfsstatsov1.cldelegates = nfsstatsv1.cldelegates; nfsstatsov1.cllocalopenowners = nfsstatsv1.cllocalopenowners; nfsstatsov1.cllocalopens = nfsstatsv1.cllocalopens; nfsstatsov1.cllocallockowners = nfsstatsv1.cllocallockowners; nfsstatsov1.cllocallocks = nfsstatsv1.cllocallocks; nfsstatsov1.srvstartcnt = nfsstatsv1.srvstartcnt; nfsstatsov1.srvdonecnt = nfsstatsv1.srvdonecnt; for (i = NFSV42_NOPS, j = NFSV42_PURENOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) { nfsstatsov1.srvbytes[j] = nfsstatsv1.srvbytes[i]; nfsstatsov1.srvops[j] = nfsstatsv1.srvops[i]; nfsstatsov1.srvduration[j] = nfsstatsv1.srvduration[i]; } nfsstatsov1.busyfrom = nfsstatsv1.busyfrom; nfsstatsov1.busyfrom = nfsstatsv1.busyfrom; error = copyout(&nfsstatsov1, uap->argp, sizeof(nfsstatsov1)); } else if (nfsstatver.vers != NFSSTATS_V1) error = EPERM; else error = copyout(&nfsstatsv1, uap->argp, sizeof(nfsstatsv1)); } } if (error == 0) { if ((uap->flag & NFSSVC_ZEROCLTSTATS) != 0) { nfsstatsv1.attrcache_hits = 0; nfsstatsv1.attrcache_misses = 0; nfsstatsv1.lookupcache_hits = 0; nfsstatsv1.lookupcache_misses = 0; nfsstatsv1.direofcache_hits = 0; nfsstatsv1.direofcache_misses = 0; nfsstatsv1.accesscache_hits = 0; nfsstatsv1.accesscache_misses = 0; nfsstatsv1.biocache_reads = 0; nfsstatsv1.read_bios = 0; nfsstatsv1.read_physios = 0; nfsstatsv1.biocache_writes = 0; nfsstatsv1.write_bios = 0; nfsstatsv1.write_physios = 0; nfsstatsv1.biocache_readlinks = 0; nfsstatsv1.readlink_bios = 0; nfsstatsv1.biocache_readdirs = 0; nfsstatsv1.readdir_bios = 0; nfsstatsv1.rpcretries = 0; nfsstatsv1.rpcrequests = 0; nfsstatsv1.rpctimeouts = 0; nfsstatsv1.rpcunexpected = 0; nfsstatsv1.rpcinvalid = 0; bzero(nfsstatsv1.rpccnt, sizeof(nfsstatsv1.rpccnt)); } if ((uap->flag & NFSSVC_ZEROSRVSTATS) != 0) { nfsstatsv1.srvcache_inproghits = 0; nfsstatsv1.srvcache_nonidemdonehits = 0; nfsstatsv1.srvcache_misses = 0; nfsstatsv1.srvcache_tcppeak = 0; bzero(nfsstatsv1.srvrpccnt, sizeof(nfsstatsv1.srvrpccnt)); bzero(nfsstatsv1.cbrpccnt, sizeof(nfsstatsv1.cbrpccnt)); } } goto out; } else if (uap->flag & NFSSVC_NFSUSERDPORT) { u_short sockport; struct nfsuserd_args nargs; if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { error = copyin(uap->argp, (caddr_t)&sockport, sizeof (u_short)); if (error == 0) { nargs.nuserd_family = AF_INET; nargs.nuserd_port = sockport; } } else { /* * New nfsuserd_args structure, which indicates * which IP version to use along with the port#. */ error = copyin(uap->argp, &nargs, sizeof(nargs)); } if (!error) error = nfsrv_nfsuserdport(&nargs, p); } else if (uap->flag & NFSSVC_NFSUSERDDELPORT) { nfsrv_nfsuserddelport(); error = 0; } out: NFSEXITCODE(error); return (error); } /* * called by all three modevent routines, so that it gets things * initialized soon enough. */ void newnfs_portinit(void) { static int inited = 0; if (inited) return; inited = 1; /* Initialize SMP locks used by both client and server. */ mtx_init(&newnfsd_mtx, "newnfsd_mtx", NULL, MTX_DEF); mtx_init(&nfs_state_mutex, "nfs_state_mutex", NULL, MTX_DEF); mtx_init(&nfs_clstate_mutex, "nfs_clstate_mutex", NULL, MTX_DEF); } /* * Determine if the file system supports NFSv4 ACLs. * Return 1 if it does, 0 otherwise. */ int nfs_supportsnfsv4acls(struct vnode *vp) { int error; long retval; ASSERT_VOP_LOCKED(vp, "nfs supports nfsv4acls"); if (nfsrv_useacl == 0) return (0); error = VOP_PATHCONF(vp, _PC_ACL_NFS4, &retval); if (error == 0 && retval != 0) return (1); return (0); } /* * These are the first fields of all the context structures passed into * nfs_pnfsio(). */ struct pnfsio { int done; int inprog; struct task tsk; }; /* * Do a mirror I/O on a pNFS thread. */ int nfs_pnfsio(task_fn_t *func, void *context) { struct pnfsio *pio; int ret; static struct taskqueue *pnfsioq = NULL; pio = (struct pnfsio *)context; if (pnfsioq == NULL) { if (nfs_pnfsiothreads == 0) return (EPERM); if (nfs_pnfsiothreads < 0) nfs_pnfsiothreads = mp_ncpus * 4; pnfsioq = taskqueue_create("pnfsioq", M_WAITOK, taskqueue_thread_enqueue, &pnfsioq); if (pnfsioq == NULL) return (ENOMEM); ret = taskqueue_start_threads(&pnfsioq, nfs_pnfsiothreads, 0, "pnfsiot"); if (ret != 0) { taskqueue_free(pnfsioq); pnfsioq = NULL; return (ret); } } pio->inprog = 1; TASK_INIT(&pio->tsk, 0, func, context); ret = taskqueue_enqueue(pnfsioq, &pio->tsk); if (ret != 0) pio->inprog = 0; return (ret); } extern int (*nfsd_call_nfscommon)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfscommon_modevent(module_t mod, int type, void *data) { int error = 0; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) goto out; newnfs_portinit(); mtx_init(&nfs_nameid_mutex, "nfs_nameid_mutex", NULL, MTX_DEF); mtx_init(&nfs_sockl_mutex, "nfs_sockl_mutex", NULL, MTX_DEF); mtx_init(&nfs_slock_mutex, "nfs_slock_mutex", NULL, MTX_DEF); mtx_init(&nfs_req_mutex, "nfs_req_mutex", NULL, MTX_DEF); mtx_init(&nfsrv_nfsuserdsock.nr_mtx, "nfsuserd", NULL, MTX_DEF); mtx_init(&nfsrv_dslock_mtx, "nfs4ds", NULL, MTX_DEF); TAILQ_INIT(&nfsrv_devidhead); callout_init(&newnfsd_callout, 1); newnfs_init(); nfsd_call_nfscommon = nfssvc_nfscommon; loaded = 1; break; case MOD_UNLOAD: if (newnfs_numnfsd != 0 || nfsrv_nfsuserd != NOTRUNNING || nfs_numnfscbd != 0) { error = EBUSY; break; } nfsd_call_nfscommon = NULL; callout_drain(&newnfsd_callout); /* Clean out the name<-->id cache. */ nfsrv_cleanusergroup(); /* and get rid of the mutexes */ mtx_destroy(&nfs_nameid_mutex); mtx_destroy(&newnfsd_mtx); mtx_destroy(&nfs_state_mutex); mtx_destroy(&nfs_clstate_mutex); mtx_destroy(&nfs_sockl_mutex); mtx_destroy(&nfs_slock_mutex); mtx_destroy(&nfs_req_mutex); mtx_destroy(&nfsrv_nfsuserdsock.nr_mtx); mtx_destroy(&nfsrv_dslock_mtx); loaded = 0; break; default: error = EOPNOTSUPP; break; } out: NFSEXITCODE(error); return error; } static moduledata_t nfscommon_mod = { "nfscommon", nfscommon_modevent, NULL, }; DECLARE_MODULE(nfscommon, nfscommon_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfscommon, 1); MODULE_DEPEND(nfscommon, nfssvc, 1, 1, 1); MODULE_DEPEND(nfscommon, krpc, 1, 1, 1); diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index 1bdc13123aac..817d89284091 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -1,4930 +1,4931 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include /* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps */ u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; /* And other global data */ nfstype nfsv34_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; enum vtype newnv2tov_type[8] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; enum vtype nv34tov_type[8]={ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; struct timeval nfsboottime; /* Copy boottime once, so it never changes */ int nfscl_ticks; int nfsrv_useacl = 1; struct nfssockreq nfsrv_nfsuserdsock; nfsuserd_state nfsrv_nfsuserd = NOTRUNNING; static int nfsrv_userdupcalls = 0; struct nfsreqhead nfsd_reqq; uid_t nfsrv_defaultuid = UID_NOBODY; gid_t nfsrv_defaultgid = GID_NOGROUP; int nfsrv_lease = NFSRV_LEASE; int ncl_mbuf_mlen = MLEN; int nfsd_enable_stringtouid = 0; int nfsrv_doflexfile = 0; static int nfs_enable_uidtostring = 0; NFSNAMEIDMUTEX; NFSSOCKMUTEX; extern int nfsrv_lughashsize; extern struct mtx nfsrv_dslock_mtx; extern volatile int nfsrv_devidcnt; extern int nfscl_debuglevel; extern struct nfsdevicehead nfsrv_devidhead; extern struct nfsstatsv1 nfsstatsv1; +extern uint32_t nfs_srvmaxio; SYSCTL_DECL(_vfs_nfs); SYSCTL_INT(_vfs_nfs, OID_AUTO, enable_uidtostring, CTLFLAG_RW, &nfs_enable_uidtostring, 0, "Make nfs always send numeric owner_names"); int nfsrv_maxpnfsmirror = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, pnfsmirror, CTLFLAG_RD, &nfsrv_maxpnfsmirror, 0, "Mirror level for pNFS service"); int nfs_maxcopyrange = 10 * 1024 * 1024; SYSCTL_INT(_vfs_nfs, OID_AUTO, maxcopyrange, CTLFLAG_RW, &nfs_maxcopyrange, 0, "Max size of a Copy so RPC times reasonable"); /* * This array of structures indicates, for V4: * retfh - which of 3 types of calling args are used * 0 - doesn't change cfh or use a sfh * 1 - replaces cfh with a new one (unless it returns an error status) * 2 - uses cfh and sfh * needscfh - if the op wants a cfh and premtime * 0 - doesn't use a cfh * 1 - uses a cfh, but doesn't want pre-op attributes * 2 - uses a cfh and wants pre-op attributes * savereply - indicates a non-idempotent Op * 0 - not non-idempotent * 1 - non-idempotent * Ops that are ordered via seqid# are handled separately from these * non-idempotent Ops. * Define it here, since it is used by both the client and server. */ struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS] = { { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Access */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Close */ { 0, 2, 0, 1, LK_EXCLUSIVE, 1, 1 }, /* Commit */ { 1, 2, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Create */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Delegpurge */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Delegreturn */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Getattr */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* GetFH */ { 2, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Link */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Lock */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* LockT */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* LockU */ { 1, 2, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Lookup */ { 1, 2, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Lookupp */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* NVerify */ { 1, 1, 0, 1, LK_EXCLUSIVE, 1, 0 }, /* Open */ { 1, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenAttr */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenConfirm */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenDowngrade */ { 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutFH */ { 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutPubFH */ { 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutRootFH */ { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* Read */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Readdir */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* ReadLink */ { 0, 2, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Remove */ { 2, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Rename */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Renew */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* RestoreFH */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SaveFH */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SecInfo */ { 0, 2, 1, 1, LK_EXCLUSIVE, 1, 0 }, /* Setattr */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SetClientID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SetClientIDConfirm */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Verify */ { 0, 2, 1, 1, LK_EXCLUSIVE, 1, 0 }, /* Write */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* ReleaseLockOwner */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Backchannel Ctrl */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Bind Conn to Sess */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Exchange ID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Create Session */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Destroy Session */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Free StateID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Dir Deleg */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Device Info */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Device List */ { 0, 1, 0, 1, LK_EXCLUSIVE, 1, 1 }, /* Layout Commit */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Layout Get */ { 0, 1, 0, 1, LK_EXCLUSIVE, 1, 0 }, /* Layout Return */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Secinfo No name */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Sequence */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Set SSV */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Test StateID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Want Delegation */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Destroy ClientID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Reclaim Complete */ { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 0 }, /* Allocate */ { 2, 1, 1, 0, LK_SHARED, 1, 0 }, /* Copy */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Copy Notify */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Deallocate */ { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* IO Advise */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Layout Error */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Layout Stats */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Offload Cancel */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Offload Status */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Read Plus */ { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* Seek */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Write Same */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Clone */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Getxattr */ { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Setxattr */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Listxattrs */ { 0, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Removexattr */ }; static int ncl_mbuf_mhlen = MHLEN; static int nfsrv_usercnt = 0; static int nfsrv_dnsnamelen; static u_char *nfsrv_dnsname = NULL; static int nfsrv_usermax = 999999999; struct nfsrv_lughash { struct mtx mtx; struct nfsuserhashhead lughead; }; static struct nfsrv_lughash *nfsuserhash; static struct nfsrv_lughash *nfsusernamehash; static struct nfsrv_lughash *nfsgrouphash; static struct nfsrv_lughash *nfsgroupnamehash; /* * This static array indicates whether or not the RPC generates a large * reply. This is used by nfs_reply() to decide whether or not an mbuf * cluster should be allocated. (If a cluster is required by an RPC * marked 0 in this array, the code will still work, just not quite as * efficiently.) */ static int nfs_bigreply[NFSV42_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0 }; /* local functions */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep); static void nfsv4_wanted(struct nfsv4lock *lp); static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len); static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name); static void nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser); static int nfsrv_getrefstr(struct nfsrv_descript *, u_char **, u_char **, int *, int *); static void nfsrv_refstrbigenough(int, u_char **, u_char **, int *); static struct { int op; int opcnt; const u_char *tag; int taglen; } nfsv4_opmap[NFSV42_NPROCS] = { { 0, 1, "Null", 4 }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_SETATTR, 2, "Setattr", 7, }, { NFSV4OP_LOOKUP, 3, "Lookup", 6, }, { NFSV4OP_ACCESS, 2, "Access", 6, }, { NFSV4OP_READLINK, 2, "Readlink", 8, }, { NFSV4OP_READ, 1, "Read", 4, }, { NFSV4OP_WRITE, 2, "Write", 5, }, { NFSV4OP_OPEN, 5, "Open", 4, }, { NFSV4OP_CREATE, 5, "Create", 6, }, { NFSV4OP_CREATE, 1, "Create", 6, }, { NFSV4OP_CREATE, 3, "Create", 6, }, { NFSV4OP_REMOVE, 1, "Remove", 6, }, { NFSV4OP_REMOVE, 1, "Remove", 6, }, { NFSV4OP_SAVEFH, 5, "Rename", 6, }, { NFSV4OP_SAVEFH, 4, "Link", 4, }, { NFSV4OP_READDIR, 2, "Readdir", 7, }, { NFSV4OP_READDIR, 2, "Readdir", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_COMMIT, 2, "Commit", 6, }, { NFSV4OP_LOOKUPP, 3, "Lookupp", 7, }, { NFSV4OP_SETCLIENTID, 1, "SetClientID", 11, }, { NFSV4OP_SETCLIENTIDCFRM, 1, "SetClientIDConfirm", 18, }, { NFSV4OP_LOCK, 1, "Lock", 4, }, { NFSV4OP_LOCKU, 1, "LockU", 5, }, { NFSV4OP_OPEN, 2, "Open", 4, }, { NFSV4OP_CLOSE, 1, "Close", 5, }, { NFSV4OP_OPENCONFIRM, 1, "Openconfirm", 11, }, { NFSV4OP_LOCKT, 1, "LockT", 5, }, { NFSV4OP_OPENDOWNGRADE, 1, "Opendowngrade", 13, }, { NFSV4OP_RENEW, 1, "Renew", 5, }, { NFSV4OP_PUTROOTFH, 1, "Dirpath", 7, }, { NFSV4OP_RELEASELCKOWN, 1, "Rellckown", 9, }, { NFSV4OP_DELEGRETURN, 1, "Delegret", 8, }, { NFSV4OP_DELEGRETURN, 3, "DelegRemove", 11, }, { NFSV4OP_DELEGRETURN, 7, "DelegRename1", 12, }, { NFSV4OP_DELEGRETURN, 9, "DelegRename2", 12, }, { NFSV4OP_GETATTR, 1, "Getacl", 6, }, { NFSV4OP_SETATTR, 1, "Setacl", 6, }, { NFSV4OP_EXCHANGEID, 1, "ExchangeID", 10, }, { NFSV4OP_CREATESESSION, 1, "CreateSession", 13, }, { NFSV4OP_DESTROYSESSION, 1, "DestroySession", 14, }, { NFSV4OP_DESTROYCLIENTID, 1, "DestroyClient", 13, }, { NFSV4OP_FREESTATEID, 1, "FreeStateID", 11, }, { NFSV4OP_LAYOUTGET, 1, "LayoutGet", 9, }, { NFSV4OP_GETDEVINFO, 1, "GetDeviceInfo", 13, }, { NFSV4OP_LAYOUTCOMMIT, 1, "LayoutCommit", 12, }, { NFSV4OP_LAYOUTRETURN, 1, "LayoutReturn", 12, }, { NFSV4OP_RECLAIMCOMPL, 1, "ReclaimComplete", 15, }, { NFSV4OP_WRITE, 1, "WriteDS", 7, }, { NFSV4OP_READ, 1, "ReadDS", 6, }, { NFSV4OP_COMMIT, 1, "CommitDS", 8, }, { NFSV4OP_OPEN, 3, "OpenLayoutGet", 13, }, { NFSV4OP_OPEN, 8, "CreateLayGet", 12, }, { NFSV4OP_IOADVISE, 1, "Advise", 6, }, { NFSV4OP_ALLOCATE, 2, "Allocate", 8, }, { NFSV4OP_SAVEFH, 5, "Copy", 4, }, { NFSV4OP_SEEK, 2, "Seek", 4, }, { NFSV4OP_SEEK, 1, "SeekDS", 6, }, { NFSV4OP_GETXATTR, 2, "Getxattr", 8, }, { NFSV4OP_SETXATTR, 2, "Setxattr", 8, }, { NFSV4OP_REMOVEXATTR, 2, "Rmxattr", 7, }, { NFSV4OP_LISTXATTRS, 2, "Listxattr", 9, }, { NFSV4OP_BINDCONNTOSESS, 1, "BindConSess", 11, }, }; /* * NFS RPCS that have large request message size. */ static int nfs_bigrequest[NFSV42_NPROCS] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }; /* * Start building a request. Mostly just put the first file handle in * place. */ void nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, u_int8_t *nfhp, int fhlen, u_int32_t **opcntpp, struct nfsclsession *sep, int vers, int minorvers) { struct mbuf *mb; u_int32_t *tl; int opcnt; nfsattrbit_t attrbits; /* * First, fill in some of the fields of nd. */ nd->nd_slotseq = NULL; if (vers == NFS_VER4) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; if (minorvers == NFSV41_MINORVERSION) nd->nd_flag |= ND_NFSV41; else if (minorvers == NFSV42_MINORVERSION) nd->nd_flag |= (ND_NFSV41 | ND_NFSV42); } else if (vers == NFS_VER3) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else { if (NFSHASNFSV4(nmp)) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; if (nmp->nm_minorvers == 1) nd->nd_flag |= ND_NFSV41; else if (nmp->nm_minorvers == 2) nd->nd_flag |= (ND_NFSV41 | ND_NFSV42); } else if (NFSHASNFSV3(nmp)) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else nd->nd_flag = ND_NFSV2 | ND_NFSCL; } nd->nd_procnum = procnum; nd->nd_repstat = 0; nd->nd_maxextsiz = 0; /* * Get the first mbuf for the request. */ if (nfs_bigrequest[procnum]) NFSMCLGET(mb, M_WAITOK); else NFSMGET(mb); mb->m_len = 0; nd->nd_mreq = nd->nd_mb = mb; nd->nd_bpos = mtod(mb, char *); /* * And fill the first file handle into the request. */ if (nd->nd_flag & ND_NFSV4) { opcnt = nfsv4_opmap[procnum].opcnt + nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh; if ((nd->nd_flag & ND_NFSV41) != 0) { opcnt += nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq; if (procnum == NFSPROC_RENEW) /* * For the special case of Renew, just do a * Sequence Op. */ opcnt = 1; else if (procnum == NFSPROC_WRITEDS || procnum == NFSPROC_COMMITDS) /* * For the special case of a Writeor Commit to * a DS, the opcnt == 3, for Sequence, PutFH, * Write/Commit. */ opcnt = 3; } /* * What should the tag really be? */ (void) nfsm_strtom(nd, nfsv4_opmap[procnum].tag, nfsv4_opmap[procnum].taglen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if ((nd->nd_flag & ND_NFSV42) != 0) *tl++ = txdr_unsigned(NFSV42_MINORVERSION); else if ((nd->nd_flag & ND_NFSV41) != 0) *tl++ = txdr_unsigned(NFSV41_MINORVERSION); else *tl++ = txdr_unsigned(NFSV4_MINORVERSION); if (opcntpp != NULL) *opcntpp = tl; *tl = txdr_unsigned(opcnt); if ((nd->nd_flag & ND_NFSV41) != 0 && nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq > 0) { if (nfsv4_opflag[nfsv4_opmap[procnum].op].loopbadsess > 0) nd->nd_flag |= ND_LOOPBADSESS; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_SEQUENCE); if (sep == NULL) { sep = nfsmnt_mdssession(nmp); nfsv4_setsequence(nmp, nd, sep, nfs_bigreply[procnum]); } else nfsv4_setsequence(nmp, nd, sep, nfs_bigreply[procnum]); } if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh > 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, nfhp, fhlen, 0); if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh == 2 && procnum != NFSPROC_WRITEDS && procnum != NFSPROC_COMMITDS) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); /* * For Lookup Ops, we want all the directory * attributes, so we can load the name cache. */ if (procnum == NFSPROC_LOOKUP || procnum == NFSPROC_LOOKUPP) NFSGETATTR_ATTRBIT(&attrbits); else { NFSWCCATTR_ATTRBIT(&attrbits); nd->nd_flag |= ND_V4WCCATTR; } (void) nfsrv_putattrbit(nd, &attrbits); } } if (procnum != NFSPROC_RENEW || (nd->nd_flag & ND_NFSV41) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsv4_opmap[procnum].op); } } else { (void) nfsm_fhtom(nd, nfhp, fhlen, 0); } if (procnum < NFSV42_NPROCS) NFSINCRGLOBAL(nfsstatsv1.rpccnt[procnum]); } /* * Put a state Id in the mbuf list. */ void nfsm_stateidtom(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, int flag) { nfsv4stateid_t *st; NFSM_BUILD(st, nfsv4stateid_t *, NFSX_STATEID); if (flag == NFSSTATEID_PUTALLZERO) { st->seqid = 0; st->other[0] = 0; st->other[1] = 0; st->other[2] = 0; } else if (flag == NFSSTATEID_PUTALLONE) { st->seqid = 0xffffffff; st->other[0] = 0xffffffff; st->other[1] = 0xffffffff; st->other[2] = 0xffffffff; } else if (flag == NFSSTATEID_PUTSEQIDZERO) { st->seqid = 0; st->other[0] = stateidp->other[0]; st->other[1] = stateidp->other[1]; st->other[2] = stateidp->other[2]; } else { st->seqid = stateidp->seqid; st->other[0] = stateidp->other[0]; st->other[1] = stateidp->other[1]; st->other[2] = stateidp->other[2]; } } /* * Fill in the setable attributes. The full argument indicates whether * to fill in them all or just mode and time. */ void nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap, struct vnode *vp, int flags, u_int32_t rdev) { u_int32_t *tl; struct nfsv2_sattr *sp; nfsattrbit_t attrbits; struct nfsnode *np; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_BUILD(sp, struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (mode_t)VNOVAL) sp->sa_mode = newnfs_xdrneg1; else sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = newnfs_xdrneg1; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = newnfs_xdrneg1; else sp->sa_gid = txdr_unsigned(vap->va_gid); if (flags & NFSSATTR_SIZE0) sp->sa_size = 0; else if (flags & NFSSATTR_SIZENEG1) sp->sa_size = newnfs_xdrneg1; else if (flags & NFSSATTR_SIZERDEV) sp->sa_size = txdr_unsigned(rdev); else sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); break; case ND_NFSV3: if (vap->va_mode != (mode_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_mode); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_uid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_gid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(vap->va_size, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if (vap->va_atime.tv_sec != VNOVAL) { if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_atime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } if (vap->va_mtime.tv_sec != VNOVAL) { if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_mtime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } break; case ND_NFSV4: NFSZERO_ATTRBIT(&attrbits); if (vap->va_mode != (mode_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE); if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP); if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); if (vap->va_atime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); if (vap->va_mtime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET); if (vap->va_birthtime.tv_sec != VNOVAL && strcmp(vp->v_mount->mnt_vfc->vfc_name, "nfs") == 0) { /* * We can only test for support of TimeCreate if * the "vp" argument is for an NFS vnode. */ np = VTONFS(vp); if (NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, NFSATTRBIT_TIMECREATE)) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE); } (void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL); break; } } #ifndef APPLE /* * copies mbuf chain to the uio scatter/gather list */ int nfsm_mbufuio(struct nfsrv_descript *nd, struct uio *uiop, int siz) { char *mbufcp, *uiocp; int xfer, left, len; struct mbuf *mp; long uiosiz, rem; int error = 0; mp = nd->nd_md; mbufcp = nd->nd_dpos; len = mtod(mp, caddr_t) + mp->m_len - mbufcp; rem = NFSM_RNDUP(siz) - siz; while (siz > 0) { if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) { error = EBADRPC; goto out; } left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { while (len == 0) { mp = mp->m_next; if (mp == NULL) { error = EBADRPC; goto out; } mbufcp = mtod(mp, caddr_t); len = mp->m_len; KASSERT(len >= 0, ("len %d, corrupted mbuf?", len)); } xfer = (left > len) ? len : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (mbufcp, uiocp, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) NFSBCOPY(mbufcp, uiocp, xfer); else copyout(mbufcp, uiocp, xfer); left -= xfer; len -= xfer; mbufcp += xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } if (uiop->uio_iov->iov_len <= siz) { uiop->uio_iovcnt--; uiop->uio_iov++; } else { uiop->uio_iov->iov_base = (void *) ((char *)uiop->uio_iov->iov_base + uiosiz); uiop->uio_iov->iov_len -= uiosiz; } siz -= uiosiz; } nd->nd_dpos = mbufcp; nd->nd_md = mp; if (rem > 0) { if (len < rem) error = nfsm_advance(nd, rem, len); else nd->nd_dpos += rem; } out: NFSEXITCODE2(error, nd); return (error); } #endif /* !APPLE */ /* * Help break down an mbuf chain by setting the first siz bytes contiguous * pointed to by returned val. * This is used by the macro NFSM_DISSECT for tough * cases. */ void * nfsm_dissct(struct nfsrv_descript *nd, int siz, int how) { struct mbuf *mp2; int siz2, xfer; caddr_t p; int left; caddr_t retp; retp = NULL; left = mtod(nd->nd_md, caddr_t) + nd->nd_md->m_len - nd->nd_dpos; while (left == 0) { nd->nd_md = nd->nd_md->m_next; if (nd->nd_md == NULL) return (retp); left = nd->nd_md->m_len; nd->nd_dpos = mtod(nd->nd_md, caddr_t); } if (left >= siz) { retp = nd->nd_dpos; nd->nd_dpos += siz; } else if (nd->nd_md->m_next == NULL) { return (retp); } else if (siz > ncl_mbuf_mhlen) { panic("nfs S too big"); } else { MGET(mp2, MT_DATA, how); if (mp2 == NULL) return (NULL); mp2->m_next = nd->nd_md->m_next; nd->nd_md->m_next = mp2; nd->nd_md->m_len -= left; nd->nd_md = mp2; retp = p = mtod(mp2, caddr_t); NFSBCOPY(nd->nd_dpos, p, left); /* Copy what was left */ siz2 = siz - left; p += left; mp2 = mp2->m_next; /* Loop around copying up the siz2 bytes */ while (siz2 > 0) { if (mp2 == NULL) return (NULL); xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2; if (xfer > 0) { NFSBCOPY(mtod(mp2, caddr_t), p, xfer); mp2->m_data += xfer; mp2->m_len -= xfer; p += xfer; siz2 -= xfer; } if (siz2 > 0) mp2 = mp2->m_next; } nd->nd_md->m_len = siz; nd->nd_md = mp2; nd->nd_dpos = mtod(mp2, caddr_t); } return (retp); } /* * Advance the position in the mbuf chain. * If offs == 0, this is a no-op, but it is simpler to just return from * here than check for offs > 0 for all calls to nfsm_advance. * If left == -1, it should be calculated here. */ int nfsm_advance(struct nfsrv_descript *nd, int offs, int left) { int error = 0; if (offs == 0) goto out; /* * A negative offs might indicate a corrupted mbuf chain and, * as such, a printf is logged. */ if (offs < 0) { printf("nfsrv_advance: negative offs\n"); error = EBADRPC; goto out; } /* * If left == -1, calculate it here. */ if (left == -1) left = mtod(nd->nd_md, caddr_t) + nd->nd_md->m_len - nd->nd_dpos; /* * Loop around, advancing over the mbuf data. */ while (offs > left) { offs -= left; nd->nd_md = nd->nd_md->m_next; if (nd->nd_md == NULL) { error = EBADRPC; goto out; } left = nd->nd_md->m_len; nd->nd_dpos = mtod(nd->nd_md, caddr_t); } nd->nd_dpos += offs; out: NFSEXITCODE(error); return (error); } /* * Copy a string into mbuf(s). * Return the number of bytes output, including XDR overheads. */ int nfsm_strtom(struct nfsrv_descript *nd, const char *cp, int siz) { struct mbuf *m2; int xfer, left; struct mbuf *m1; int rem, bytesize; u_int32_t *tl; char *cp2; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(siz); rem = NFSM_RNDUP(siz) - siz; bytesize = NFSX_UNSIGNED + siz + rem; m2 = nd->nd_mb; cp2 = nd->nd_bpos; if ((nd->nd_flag & ND_EXTPG) != 0) left = nd->nd_bextpgsiz; else left = M_TRAILINGSPACE(m2); KASSERT(((m2->m_flags & (M_EXT | M_EXTPG)) == (M_EXT | M_EXTPG) && (nd->nd_flag & ND_EXTPG) != 0) || ((m2->m_flags & (M_EXT | M_EXTPG)) != (M_EXT | M_EXTPG) && (nd->nd_flag & ND_EXTPG) == 0), ("nfsm_strtom: ext_pgs and non-ext_pgs mbufs mixed")); /* * Loop around copying the string to mbuf(s). */ while (siz > 0) { if (left == 0) { if ((nd->nd_flag & ND_EXTPG) != 0) { m2 = nfsm_add_ext_pgs(m2, nd->nd_maxextsiz, &nd->nd_bextpg); cp2 = (char *)(void *)PHYS_TO_DMAP( m2->m_epg_pa[nd->nd_bextpg]); nd->nd_bextpgsiz = left = PAGE_SIZE; } else { if (siz > ncl_mbuf_mlen) NFSMCLGET(m1, M_WAITOK); else NFSMGET(m1); m1->m_len = 0; cp2 = mtod(m1, char *); left = M_TRAILINGSPACE(m1); m2->m_next = m1; m2 = m1; } } if (left >= siz) xfer = siz; else xfer = left; NFSBCOPY(cp, cp2, xfer); cp += xfer; cp2 += xfer; m2->m_len += xfer; siz -= xfer; left -= xfer; if ((nd->nd_flag & ND_EXTPG) != 0) { nd->nd_bextpgsiz -= xfer; m2->m_epg_last_len += xfer; } if (siz == 0 && rem) { if (left < rem) panic("nfsm_strtom"); NFSBZERO(cp2, rem); m2->m_len += rem; cp2 += rem; if ((nd->nd_flag & ND_EXTPG) != 0) { nd->nd_bextpgsiz -= rem; m2->m_epg_last_len += rem; } } } nd->nd_mb = m2; if ((nd->nd_flag & ND_EXTPG) != 0) nd->nd_bpos = cp2; else nd->nd_bpos = mtod(m2, char *) + m2->m_len; return (bytesize); } /* * Called once to initialize data structures... */ void newnfs_init(void) { static int nfs_inited = 0; if (nfs_inited) return; nfs_inited = 1; newnfs_true = txdr_unsigned(TRUE); newnfs_false = txdr_unsigned(FALSE); newnfs_xdrneg1 = txdr_unsigned(-1); nfscl_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfscl_ticks < 1) nfscl_ticks = 1; NFSSETBOOTTIME(nfsboottime); /* * Initialize reply list and start timer */ TAILQ_INIT(&nfsd_reqq); NFS_TIMERINIT; } /* * Put a file handle in an mbuf list. * If the size argument == 0, just use the default size. * set_true == 1 if there should be an newnfs_true prepended on the file handle. * Return the number of bytes output, including XDR overhead. */ int nfsm_fhtom(struct nfsrv_descript *nd, u_int8_t *fhp, int size, int set_true) { u_int32_t *tl; u_int8_t *cp; int fullsiz, rem, bytesize = 0; if (size == 0) size = NFSX_MYFH; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: if (size > NFSX_V2FH) panic("fh size > NFSX_V2FH for NFSv2"); NFSM_BUILD(cp, u_int8_t *, NFSX_V2FH); NFSBCOPY(fhp, cp, size); if (size < NFSX_V2FH) NFSBZERO(cp + size, NFSX_V2FH - size); bytesize = NFSX_V2FH; break; case ND_NFSV3: case ND_NFSV4: fullsiz = NFSM_RNDUP(size); rem = fullsiz - size; if (set_true) { bytesize = 2 * NFSX_UNSIGNED + fullsiz; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; } else { bytesize = NFSX_UNSIGNED + fullsiz; } (void) nfsm_strtom(nd, fhp, size); break; } return (bytesize); } /* * This function compares two net addresses by family and returns TRUE * if they are the same host. * If there is any doubt, return FALSE. * The AF_INET family is handled as a special case so that address mbufs * don't need to be saved to store "struct in_addr", which is only 4 bytes. */ int nfsaddr_match(int family, union nethostaddr *haddr, NFSSOCKADDR_T nam) { #ifdef INET struct sockaddr_in *inetaddr; #endif switch (family) { #ifdef INET case AF_INET: inetaddr = NFSSOCKADDR(nam, struct sockaddr_in *); if (inetaddr->sin_family == AF_INET && inetaddr->sin_addr.s_addr == haddr->had_inet.s_addr) return (1); break; #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *inetaddr6; inetaddr6 = NFSSOCKADDR(nam, struct sockaddr_in6 *); /* XXX - should test sin6_scope_id ? */ if (inetaddr6->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&inetaddr6->sin6_addr, &haddr->had_inet6)) return (1); } break; #endif } return (0); } /* * Similar to the above, but takes to NFSSOCKADDR_T args. */ int nfsaddr2_match(NFSSOCKADDR_T nam1, NFSSOCKADDR_T nam2) { struct sockaddr_in *addr1, *addr2; struct sockaddr *inaddr; inaddr = NFSSOCKADDR(nam1, struct sockaddr *); switch (inaddr->sa_family) { case AF_INET: addr1 = NFSSOCKADDR(nam1, struct sockaddr_in *); addr2 = NFSSOCKADDR(nam2, struct sockaddr_in *); if (addr2->sin_family == AF_INET && addr1->sin_addr.s_addr == addr2->sin_addr.s_addr) return (1); break; #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *inet6addr1, *inet6addr2; inet6addr1 = NFSSOCKADDR(nam1, struct sockaddr_in6 *); inet6addr2 = NFSSOCKADDR(nam2, struct sockaddr_in6 *); /* XXX - should test sin6_scope_id ? */ if (inet6addr2->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&inet6addr1->sin6_addr, &inet6addr2->sin6_addr)) return (1); } break; #endif } return (0); } /* * Dissect a file handle on the client. */ int nfsm_getfh(struct nfsrv_descript *nd, struct nfsfh **nfhpp) { u_int32_t *tl; struct nfsfh *nfhp; int error, len; *nfhpp = NULL; if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { error = EBADRPC; goto nfsmout; } } else len = NFSX_V2FH; nfhp = malloc(sizeof (struct nfsfh) + len, M_NFSFH, M_WAITOK); error = nfsrv_mtostr(nd, nfhp->nfh_fh, len); if (error) { free(nfhp, M_NFSFH); goto nfsmout; } nfhp->nfh_len = len; *nfhpp = nfhp; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Break down the nfsv4 acl. * If the aclp == NULL or won't fit in an acl, just discard the acl info. */ int nfsrv_dissectacl(struct nfsrv_descript *nd, NFSACL_T *aclp, int *aclerrp, int *aclsizep, __unused NFSPROC_T *p) { u_int32_t *tl; int i, aclsize; int acecnt, error = 0, aceerr = 0, acesize; *aclerrp = 0; if (aclp) aclp->acl_cnt = 0; /* * Parse out the ace entries and expect them to conform to * what can be supported by R/W/X bits. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); aclsize = NFSX_UNSIGNED; acecnt = fxdr_unsigned(int, *tl); if (acecnt > ACL_MAX_ENTRIES) aceerr = NFSERR_ATTRNOTSUPP; if (nfsrv_useacl == 0) aceerr = NFSERR_ATTRNOTSUPP; for (i = 0; i < acecnt; i++) { if (aclp && !aceerr) error = nfsrv_dissectace(nd, &aclp->acl_entry[i], &aceerr, &acesize, p); else error = nfsrv_skipace(nd, &acesize); if (error) goto nfsmout; aclsize += acesize; } if (aclp && !aceerr) aclp->acl_cnt = acecnt; if (aceerr) *aclerrp = aceerr; if (aclsizep) *aclsizep = aclsize; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Skip over an NFSv4 ace entry. Just dissect the xdr and discard it. */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep) { u_int32_t *tl; int error, len = 0; NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 3)); error = nfsm_advance(nd, NFSM_RNDUP(len), -1); nfsmout: *acesizep = NFSM_RNDUP(len) + (4 * NFSX_UNSIGNED); NFSEXITCODE2(error, nd); return (error); } /* * Get attribute bits from an mbuf list. * Returns EBADRPC for a parsing error, 0 otherwise. * If the clearinvalid flag is set, clear the bits not supported. */ int nfsrv_getattrbits(struct nfsrv_descript *nd, nfsattrbit_t *attrbitp, int *cntp, int *retnotsupp) { u_int32_t *tl; int cnt, i, outcnt; int error = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (cnt > NFSATTRBIT_MAXWORDS) outcnt = NFSATTRBIT_MAXWORDS; else outcnt = cnt; NFSZERO_ATTRBIT(attrbitp); if (outcnt > 0) { NFSM_DISSECT(tl, u_int32_t *, outcnt * NFSX_UNSIGNED); for (i = 0; i < outcnt; i++) attrbitp->bits[i] = fxdr_unsigned(u_int32_t, *tl++); } for (i = 0; i < (cnt - outcnt); i++) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (retnotsupp != NULL && *tl != 0) *retnotsupp = NFSERR_ATTRNOTSUPP; } if (cntp) *cntp = NFSX_UNSIGNED + (cnt * NFSX_UNSIGNED); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Get the attributes for V4. * If the compare flag is true, test for any attribute changes, * otherwise return the attribute values. * These attributes cover fields in "struct vattr", "struct statfs", * "struct nfsfsinfo", the file handle and the lease duration. * The value of retcmpp is set to 1 if all attributes are the same, * and 0 otherwise. * Returns EBADRPC if it can't be parsed, 0 otherwise. */ int nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nap, struct nfsfh **nfhpp, fhandle_t *fhp, int fhsize, struct nfsv3_pathconf *pc, struct statfs *sbp, struct nfsstatfs *sfp, struct nfsfsinfo *fsp, NFSACL_T *aclp, int compare, int *retcmpp, u_int32_t *leasep, u_int32_t *rderrp, NFSPROC_T *p, struct ucred *cred) { u_int32_t *tl; int i = 0, j, k, l = 0, m, bitpos, attrsum = 0; int error, tfhsize, aceerr, attrsize, cnt, retnotsup; u_char *cp, *cp2, namestr[NFSV4_SMALLSTR + 1]; nfsattrbit_t attrbits, retattrbits, checkattrbits; struct nfsfh *tnfhp; struct nfsreferral *refp; u_quad_t tquad; nfsquad_t tnfsquad; struct timespec temptime; uid_t uid; gid_t gid; u_int32_t freenum = 0, tuint; u_int64_t uquad = 0, thyp, thyp2; #ifdef QUOTA struct dqblk dqb; uid_t savuid; #endif CTASSERT(sizeof(ino_t) == sizeof(uint64_t)); if (compare) { retnotsup = 0; error = nfsrv_getattrbits(nd, &attrbits, NULL, &retnotsup); } else { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); } if (error) goto nfsmout; if (compare) { *retcmpp = retnotsup; } else { /* * Just set default values to some of the important ones. */ if (nap != NULL) { nap->na_type = VREG; nap->na_mode = 0; nap->na_rdev = (NFSDEV_T)0; nap->na_mtime.tv_sec = 0; nap->na_mtime.tv_nsec = 0; nap->na_btime.tv_sec = -1; nap->na_btime.tv_nsec = 0; nap->na_gen = 0; nap->na_flags = 0; nap->na_blocksize = NFS_FABLKSIZE; } if (sbp != NULL) { sbp->f_bsize = NFS_FABLKSIZE; sbp->f_blocks = 0; sbp->f_bfree = 0; sbp->f_bavail = 0; sbp->f_files = 0; sbp->f_ffree = 0; } if (fsp != NULL) { fsp->fs_rtmax = 8192; fsp->fs_rtpref = 8192; fsp->fs_maxname = NFS_MAXNAMLEN; fsp->fs_wtmax = 8192; fsp->fs_wtpref = 8192; fsp->fs_wtmult = NFS_FABLKSIZE; fsp->fs_dtpref = 8192; fsp->fs_maxfilesize = 0xffffffffffffffffull; fsp->fs_timedelta.tv_sec = 0; fsp->fs_timedelta.tv_nsec = 1; fsp->fs_properties = (NFSV3_FSFLINK | NFSV3_FSFSYMLINK | NFSV3_FSFHOMOGENEOUS | NFSV3_FSFCANSETTIME); } if (pc != NULL) { pc->pc_linkmax = NFS_LINK_MAX; pc->pc_namemax = NAME_MAX; pc->pc_notrunc = 0; pc->pc_chownrestricted = 0; pc->pc_caseinsensitive = 0; pc->pc_casepreserving = 1; } if (sfp != NULL) { sfp->sf_ffiles = UINT64_MAX; sfp->sf_tfiles = UINT64_MAX; sfp->sf_afiles = UINT64_MAX; sfp->sf_fbytes = UINT64_MAX; sfp->sf_tbytes = UINT64_MAX; sfp->sf_abytes = UINT64_MAX; } } /* * Loop around getting the attributes. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsize = fxdr_unsigned(int, *tl); for (bitpos = 0; bitpos < NFSATTRBIT_MAX; bitpos++) { if (attrsum > attrsize) { error = NFSERR_BADXDR; goto nfsmout; } if (NFSISSET_ATTRBIT(&attrbits, bitpos)) switch (bitpos) { case NFSATTRBIT_SUPPORTEDATTRS: retnotsup = 0; if (compare || nap == NULL) error = nfsrv_getattrbits(nd, &retattrbits, &cnt, &retnotsup); else error = nfsrv_getattrbits(nd, &nap->na_suppattr, &cnt, &retnotsup); if (error) goto nfsmout; if (compare && !(*retcmpp)) { NFSSETSUPP_ATTRBIT(&checkattrbits, nd); /* Some filesystem do not support NFSv4ACL */ if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACL); NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACLSUPPORT); } if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits) || retnotsup) *retcmpp = NFSERR_NOTSAME; } attrsum += cnt; break; case NFSATTRBIT_TYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (nap->na_type != nfsv34tov_type(*tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_type = nfsv34tov_type(*tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FHEXPIRETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (fxdr_unsigned(int, *tl) != NFSV4FHTYPE_PERSISTENT) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHANGE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (nap->na_filerev != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_filerev = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (nap->na_size != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_size = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_LINKSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFLINK) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFLINK; else fsp->fs_properties &= ~NFSV3_FSFLINK; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_SYMLINKSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFSYMLINK) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFSYMLINK; else fsp->fs_properties &= ~NFSV3_FSFSYMLINK; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NAMEDATTR: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (*tl != newnfs_false) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSID: NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); thyp = fxdr_hyper(tl); tl += 2; thyp2 = fxdr_hyper(tl); if (compare) { if (*retcmpp == 0) { if (thyp != (u_int64_t) vp->v_mount->mnt_stat.f_fsid.val[0] || thyp2 != (u_int64_t) vp->v_mount->mnt_stat.f_fsid.val[1]) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_filesid[0] = thyp; nap->na_filesid[1] = thyp2; } attrsum += (4 * NFSX_UNSIGNED); break; case NFSATTRBIT_UNIQUEHANDLES: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_LEASETIME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (fxdr_unsigned(int, *tl) != nfsrv_lease && !(*retcmpp)) *retcmpp = NFSERR_NOTSAME; } else if (leasep != NULL) { *leasep = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_RDATTRERROR: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) *retcmpp = NFSERR_INVAL; } else if (rderrp != NULL) { *rderrp = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_ACL: if (compare) { if (!(*retcmpp)) { if (nfsrv_useacl && nfs_supportsnfsv4acls(vp)) { NFSACL_T *naclp; naclp = acl_alloc(M_WAITOK); error = nfsrv_dissectacl(nd, naclp, &aceerr, &cnt, p); if (error) { acl_free(naclp); goto nfsmout; } if (aceerr || aclp == NULL || nfsrv_compareacl(aclp, naclp)) *retcmpp = NFSERR_NOTSAME; acl_free(naclp); } else { error = nfsrv_dissectacl(nd, NULL, &aceerr, &cnt, p); *retcmpp = NFSERR_ATTRNOTSUPP; } } } else { if (vp != NULL && aclp != NULL) error = nfsrv_dissectacl(nd, aclp, &aceerr, &cnt, p); else error = nfsrv_dissectacl(nd, NULL, &aceerr, &cnt, p); if (error) goto nfsmout; } attrsum += cnt; break; case NFSATTRBIT_ACLSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (nfsrv_useacl && nfs_supportsnfsv4acls(vp)) { if (fxdr_unsigned(u_int32_t, *tl) != NFSV4ACE_SUPTYPES) *retcmpp = NFSERR_NOTSAME; } else { *retcmpp = NFSERR_ATTRNOTSUPP; } } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_ARCHIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CANSETTIME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFCANSETTIME) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFCANSETTIME; else fsp->fs_properties &= ~NFSV3_FSFCANSETTIME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEINSENSITIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_false) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_caseinsensitive = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEPRESERVING: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHOWNRESTRICTED: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_chownrestricted = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FILEHANDLE: error = nfsm_getfh(nd, &tnfhp); if (error) goto nfsmout; tfhsize = tnfhp->nfh_len; if (compare) { if (!(*retcmpp) && !NFSRV_CMPFH(tnfhp->nfh_fh, tfhsize, fhp, fhsize)) *retcmpp = NFSERR_NOTSAME; free(tnfhp, M_NFSFH); } else if (nfhpp != NULL) { *nfhpp = tnfhp; } else { free(tnfhp, M_NFSFH); } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(tfhsize)); break; case NFSATTRBIT_FILEID: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if (nap->na_fileid != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) nap->na_fileid = thyp; attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESAVAIL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_afiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_afiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESFREE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_ffiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_ffiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESTOTAL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_tfiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_tfiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FSLOCATIONS: error = nfsrv_getrefstr(nd, &cp, &cp2, &l, &m); if (error) goto nfsmout; attrsum += l; if (compare && !(*retcmpp)) { refp = nfsv4root_getreferral(vp, NULL, 0); if (refp != NULL) { if (cp == NULL || cp2 == NULL || strcmp(cp, "/") || strcmp(cp2, refp->nfr_srvlist)) *retcmpp = NFSERR_NOTSAME; } else if (m == 0) { *retcmpp = NFSERR_NOTSAME; } } if (cp != NULL) free(cp, M_NFSSTRING); if (cp2 != NULL) free(cp2, M_NFSSTRING); break; case NFSATTRBIT_HIDDEN: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HOMOGENEOUS: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFHOMOGENEOUS) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFHOMOGENEOUS; else fsp->fs_properties &= ~NFSV3_FSFHOMOGENEOUS; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXFILESIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); tnfsquad.qval = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { tquad = NFSRV_MAXFILESIZE; if (tquad != tnfsquad.qval) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_maxfilesize = tnfsquad.qval; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MAXLINK: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fxdr_unsigned(int, *tl) != NFS_LINK_MAX) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXNAME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_maxname != fxdr_unsigned(u_int32_t, *tl)) *retcmpp = NFSERR_NOTSAME; } } else { tuint = fxdr_unsigned(u_int32_t, *tl); /* * Some Linux NFSv4 servers report this * as 0 or 4billion, so I'll set it to * NFS_MAXNAMLEN. If a server actually creates * a name longer than NFS_MAXNAMLEN, it will * get an error back. */ if (tuint == 0 || tuint > NFS_MAXNAMLEN) tuint = NFS_MAXNAMLEN; if (fsp != NULL) fsp->fs_maxname = tuint; if (pc != NULL) pc->pc_namemax = tuint; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXREAD: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (fsp->fs_rtmax != fxdr_unsigned(u_int32_t, *(tl + 1)) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *++tl); fsp->fs_rtpref = fsp->fs_rtmax; fsp->fs_dtpref = fsp->fs_rtpref; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MAXWRITE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (fsp->fs_wtmax != fxdr_unsigned(u_int32_t, *(tl + 1)) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_wtmax = fxdr_unsigned(int, *++tl); fsp->fs_wtpref = fsp->fs_wtmax; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MIMETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; break; case NFSATTRBIT_MODE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (nap->na_mode != nfstov_mode(*tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_mode = nfstov_mode(*tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NOTRUNC: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NUMLINKS: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); tuint = fxdr_unsigned(u_int32_t, *tl); if (compare) { if (!(*retcmpp)) { if ((u_int32_t)nap->na_nlink != tuint) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_nlink = tuint; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (compare) { if (!(*retcmpp)) { if (nfsv4_strtouid(nd, cp, j, &uid) || nap->na_uid != uid) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { if (nfsv4_strtouid(nd, cp, j, &uid)) nap->na_uid = nfsrv_defaultuid; else nap->na_uid = uid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); break; case NFSATTRBIT_OWNERGROUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (compare) { if (!(*retcmpp)) { if (nfsv4_strtogid(nd, cp, j, &gid) || nap->na_gid != gid) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { if (nfsv4_strtogid(nd, cp, j, &gid)) nap->na_gid = nfsrv_defaultgid; else nap->na_gid = gid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); break; case NFSATTRBIT_QUOTAHARD: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA)) freenum = sbp->f_bfree; else freenum = sbp->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vp->v_mount,QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, &dqb)) freenum = min(dqb.dqb_bhardlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_QUOTASOFT: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA)) freenum = sbp->f_bfree; else freenum = sbp->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vp->v_mount,QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, &dqb)) freenum = min(dqb.dqb_bsoftlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_QUOTAUSED: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { freenum = 0; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vp->v_mount,QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, &dqb)) freenum = dqb.dqb_curblocks; p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_RAWDEV: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4SPECDATA); j = fxdr_unsigned(int, *tl++); k = fxdr_unsigned(int, *tl); if (compare) { if (!(*retcmpp)) { if (nap->na_rdev != NFSMAKEDEV(j, k)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_rdev = NFSMAKEDEV(j, k); } attrsum += NFSX_V4SPECDATA; break; case NFSATTRBIT_SPACEAVAIL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_abytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_abytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACEFREE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_fbytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_fbytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACETOTAL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_tbytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_tbytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACEUSED: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if ((u_int64_t)nap->na_bytes != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_bytes = thyp; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SYSTEM: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESS: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_atime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_atime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEACCESSSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (i == NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); attrsum += NFSX_V4TIME; } if (compare && !(*retcmpp)) *retcmpp = NFSERR_INVAL; break; case NFSATTRBIT_TIMEBACKUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_btime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_btime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEDELTA: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (fsp != NULL) { if (compare) { if (!(*retcmpp)) { if ((u_int32_t)fsp->fs_timedelta.tv_sec != fxdr_unsigned(u_int32_t, *(tl + 1)) || (u_int32_t)fsp->fs_timedelta.tv_nsec != (fxdr_unsigned(u_int32_t, *(tl + 2)) % 1000000000) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else { fxdr_nfsv4time(tl, &fsp->fs_timedelta); } } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMETADATA: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_ctime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_ctime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFY: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_mtime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_mtime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (i == NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); attrsum += NFSX_V4TIME; } if (compare && !(*retcmpp)) *retcmpp = NFSERR_INVAL; break; case NFSATTRBIT_MOUNTEDONFILEID: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if (!vp || !nfsrv_atroot(vp, &thyp2)) thyp2 = nap->na_fileid; if (thyp2 != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) nap->na_mntonfileno = thyp; attrsum += NFSX_HYPER; break; case NFSATTRBIT_SUPPATTREXCLCREAT: retnotsup = 0; error = nfsrv_getattrbits(nd, &retattrbits, &cnt, &retnotsup); if (error) goto nfsmout; if (compare && !(*retcmpp)) { NFSSETSUPP_ATTRBIT(&checkattrbits, nd); NFSCLRNOTSETABLE_ATTRBIT(&checkattrbits, nd); NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_TIMEACCESSSET); if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits) || retnotsup) *retcmpp = NFSERR_NOTSAME; } attrsum += cnt; break; case NFSATTRBIT_FSLAYOUTTYPE: case NFSATTRBIT_LAYOUTTYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (i > 0) { NFSM_DISSECT(tl, u_int32_t *, i * NFSX_UNSIGNED); attrsum += i * NFSX_UNSIGNED; j = fxdr_unsigned(int, *tl); if (i == 1 && compare && !(*retcmpp) && (((nfsrv_doflexfile != 0 || nfsrv_maxpnfsmirror > 1) && j != NFSLAYOUT_FLEXFILE) || (nfsrv_doflexfile == 0 && j != NFSLAYOUT_NFSV4_1_FILES))) *retcmpp = NFSERR_NOTSAME; } if (nfsrv_devidcnt == 0) { if (compare && !(*retcmpp) && i > 0) *retcmpp = NFSERR_NOTSAME; } else { if (compare && !(*retcmpp) && i != 1) *retcmpp = NFSERR_NOTSAME; } break; case NFSATTRBIT_LAYOUTALIGNMENT: case NFSATTRBIT_LAYOUTBLKSIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); - if (compare && !(*retcmpp) && i != NFS_SRVMAXIO) + if (compare && !(*retcmpp) && i != nfs_srvmaxio) *retcmpp = NFSERR_NOTSAME; break; default: printf("EEK! nfsv4_loadattr unknown attr=%d\n", bitpos); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; /* * and get out of the loop, since we can't parse * the unknown attrbute data. */ bitpos = NFSATTRBIT_MAX; break; } } /* * some clients pad the attrlist, so we need to skip over the * padding. */ if (attrsum > attrsize) { error = NFSERR_BADXDR; } else { attrsize = NFSM_RNDUP(attrsize); if (attrsum < attrsize) error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Implement sleep locks for newnfs. The nfslock_usecnt allows for a * shared lock and the NFSXXX_LOCK flag permits an exclusive lock. * The first argument is a pointer to an nfsv4lock structure. * The second argument is 1 iff a blocking lock is wanted. * If this argument is 0, the call waits until no thread either wants nor * holds an exclusive lock. * It returns 1 if the lock was acquired, 0 otherwise. * If several processes call this function concurrently wanting the exclusive * lock, one will get the lock and the rest will return without getting the * lock. (If the caller must have the lock, it simply calls this function in a * loop until the function returns 1 to indicate the lock was acquired.) * Any usecnt must be decremented by calling nfsv4_relref() before * calling nfsv4_lock(). It was done this way, so nfsv4_lock() could * be called in a loop. * The isleptp argument is set to indicate if the call slept, iff not NULL * and the mp argument indicates to check for a forced dismount, iff not * NULL. */ int nfsv4_lock(struct nfsv4lock *lp, int iwantlock, int *isleptp, struct mtx *mutex, struct mount *mp) { if (isleptp) *isleptp = 0; /* * If a lock is wanted, loop around until the lock is acquired by * someone and then released. If I want the lock, try to acquire it. * For a lock to be issued, no lock must be in force and the usecnt * must be zero. */ if (iwantlock) { if (!(lp->nfslock_lock & NFSV4LOCK_LOCK) && lp->nfslock_usecnt == 0) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; lp->nfslock_lock |= NFSV4LOCK_LOCK; return (1); } lp->nfslock_lock |= NFSV4LOCK_LOCKWANTED; } while (lp->nfslock_lock & (NFSV4LOCK_LOCK | NFSV4LOCK_LOCKWANTED)) { if (mp != NULL && NFSCL_FORCEDISM(mp)) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; return (0); } lp->nfslock_lock |= NFSV4LOCK_WANTED; if (isleptp) *isleptp = 1; msleep(&lp->nfslock_lock, mutex, PVFS, "nfsv4lck", hz); if (iwantlock && !(lp->nfslock_lock & NFSV4LOCK_LOCK) && lp->nfslock_usecnt == 0) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; lp->nfslock_lock |= NFSV4LOCK_LOCK; return (1); } } return (0); } /* * Release the lock acquired by nfsv4_lock(). * The second argument is set to 1 to indicate the nfslock_usecnt should be * incremented, as well. */ void nfsv4_unlock(struct nfsv4lock *lp, int incref) { lp->nfslock_lock &= ~NFSV4LOCK_LOCK; if (incref) lp->nfslock_usecnt++; nfsv4_wanted(lp); } /* * Release a reference cnt. */ void nfsv4_relref(struct nfsv4lock *lp) { if (lp->nfslock_usecnt <= 0) panic("nfsv4root ref cnt"); lp->nfslock_usecnt--; if (lp->nfslock_usecnt == 0) nfsv4_wanted(lp); } /* * Get a reference cnt. * This function will wait for any exclusive lock to be released, but will * not wait for threads that want the exclusive lock. If priority needs * to be given to threads that need the exclusive lock, a call to nfsv4_lock() * with the 2nd argument == 0 should be done before calling nfsv4_getref(). * If the mp argument is not NULL, check for NFSCL_FORCEDISM() being set and * return without getting a refcnt for that case. */ void nfsv4_getref(struct nfsv4lock *lp, int *isleptp, struct mtx *mutex, struct mount *mp) { if (isleptp) *isleptp = 0; /* * Wait for a lock held. */ while (lp->nfslock_lock & NFSV4LOCK_LOCK) { if (mp != NULL && NFSCL_FORCEDISM(mp)) return; lp->nfslock_lock |= NFSV4LOCK_WANTED; if (isleptp) *isleptp = 1; msleep(&lp->nfslock_lock, mutex, PVFS, "nfsv4gr", hz); } if (mp != NULL && NFSCL_FORCEDISM(mp)) return; lp->nfslock_usecnt++; } /* * Get a reference as above, but return failure instead of sleeping if * an exclusive lock is held. */ int nfsv4_getref_nonblock(struct nfsv4lock *lp) { if ((lp->nfslock_lock & NFSV4LOCK_LOCK) != 0) return (0); lp->nfslock_usecnt++; return (1); } /* * Test for a lock. Return 1 if locked, 0 otherwise. */ int nfsv4_testlock(struct nfsv4lock *lp) { if ((lp->nfslock_lock & NFSV4LOCK_LOCK) == 0 && lp->nfslock_usecnt == 0) return (0); return (1); } /* * Wake up anyone sleeping, waiting for this lock. */ static void nfsv4_wanted(struct nfsv4lock *lp) { if (lp->nfslock_lock & NFSV4LOCK_WANTED) { lp->nfslock_lock &= ~NFSV4LOCK_WANTED; wakeup((caddr_t)&lp->nfslock_lock); } } /* * Copy a string from an mbuf list into a character array. * Return EBADRPC if there is an mbuf error, * 0 otherwise. */ int nfsrv_mtostr(struct nfsrv_descript *nd, char *str, int siz) { char *cp; int xfer, len; struct mbuf *mp; int rem, error = 0; mp = nd->nd_md; cp = nd->nd_dpos; len = mtod(mp, caddr_t) + mp->m_len - cp; rem = NFSM_RNDUP(siz) - siz; while (siz > 0) { if (len > siz) xfer = siz; else xfer = len; NFSBCOPY(cp, str, xfer); str += xfer; siz -= xfer; if (siz > 0) { mp = mp->m_next; if (mp == NULL) { error = EBADRPC; goto out; } cp = mtod(mp, caddr_t); len = mp->m_len; } else { cp += xfer; len -= xfer; } } *str = '\0'; nd->nd_dpos = cp; nd->nd_md = mp; if (rem > 0) { if (len < rem) error = nfsm_advance(nd, rem, len); else nd->nd_dpos += rem; } out: NFSEXITCODE2(error, nd); return (error); } /* * Fill in the attributes as marked by the bitmap (V4). */ int nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, NFSACL_T *saclp, struct vattr *vap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, NFSPROC_T *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno, struct statfs *pnfssf) { int bitpos, retnum = 0; u_int32_t *tl; int siz, prefixnum, error; u_char *cp, namestr[NFSV4_SMALLSTR]; nfsattrbit_t attrbits, retbits; nfsattrbit_t *retbitp = &retbits; u_int32_t freenum, *retnump; u_int64_t uquad; struct statfs *fs; struct nfsfsinfo fsinf; struct timespec temptime; NFSACL_T *aclp, *naclp = NULL; size_t atsiz; bool xattrsupp; #ifdef QUOTA struct dqblk dqb; uid_t savuid; #endif /* * First, set the bits that can be filled and get fsinfo. */ NFSSET_ATTRBIT(retbitp, attrbitp); /* * If both p and cred are NULL, it is a client side setattr call. * If both p and cred are not NULL, it is a server side reply call. * If p is not NULL and cred is NULL, it is a client side callback * reply call. */ if (p == NULL && cred == NULL) { NFSCLRNOTSETABLE_ATTRBIT(retbitp, nd); aclp = saclp; } else { NFSCLRNOTFILLABLE_ATTRBIT(retbitp, nd); naclp = acl_alloc(M_WAITOK); aclp = naclp; } nfsvno_getfs(&fsinf, isdgram); #ifndef APPLE /* * Get the VFS_STATFS(), since some attributes need them. */ fs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); if (NFSISSETSTATFS_ATTRBIT(retbitp)) { error = VFS_STATFS(mp, fs); if (error != 0) { if (reterr) { nd->nd_repstat = NFSERR_ACCES; free(fs, M_STATFS); return (0); } NFSCLRSTATFS_ATTRBIT(retbitp); } } #endif /* * And the NFSv4 ACL... */ if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_ACLSUPPORT) && (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0))) { NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACLSUPPORT); } if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_ACL)) { if (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0)) { NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACL); } else if (naclp != NULL) { if (NFSVOPLOCK(vp, LK_SHARED) == 0) { error = VOP_ACCESSX(vp, VREAD_ACL, cred, p); if (error == 0) error = VOP_GETACL(vp, ACL_TYPE_NFS4, naclp, cred, p); NFSVOPUNLOCK(vp); } else error = NFSERR_PERM; if (error != 0) { if (reterr) { nd->nd_repstat = NFSERR_ACCES; free(fs, M_STATFS); return (0); } NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACL); } } } /* Check to see if Extended Attributes are supported. */ xattrsupp = false; if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_XATTRSUPPORT)) { if (NFSVOPLOCK(vp, LK_SHARED) == 0) { error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, "xxx", NULL, &atsiz, cred, p); NFSVOPUNLOCK(vp); if (error != EOPNOTSUPP) xattrsupp = true; } } /* * Put out the attribute bitmap for the ones being filled in * and get the field for the number of attributes returned. */ prefixnum = nfsrv_putattrbit(nd, retbitp); NFSM_BUILD(retnump, u_int32_t *, NFSX_UNSIGNED); prefixnum += NFSX_UNSIGNED; /* * Now, loop around filling in the attributes for each bit set. */ for (bitpos = 0; bitpos < NFSATTRBIT_MAX; bitpos++) { if (NFSISSET_ATTRBIT(retbitp, bitpos)) { switch (bitpos) { case NFSATTRBIT_SUPPORTEDATTRS: NFSSETSUPP_ATTRBIT(&attrbits, nd); if (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0)) { NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACLSUPPORT); NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACL); } retnum += nfsrv_putattrbit(nd, &attrbits); break; case NFSATTRBIT_TYPE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vap->va_type); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FHEXPIRETYPE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4FHTYPE_PERSISTENT); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHANGE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_filerev, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SIZE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_size, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_LINKSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_LINK) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_SYMLINKSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_SYMLINK) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NAMEDATTR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSID: NFSM_BUILD(tl, u_int32_t *, NFSX_V4FSID); *tl++ = 0; *tl++ = txdr_unsigned(mp->mnt_stat.f_fsid.val[0]); *tl++ = 0; *tl = txdr_unsigned(mp->mnt_stat.f_fsid.val[1]); retnum += NFSX_V4FSID; break; case NFSATTRBIT_UNIQUEHANDLES: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_LEASETIME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsrv_lease); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_RDATTRERROR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(rderror); retnum += NFSX_UNSIGNED; break; /* * Recommended Attributes. (Only the supported ones.) */ case NFSATTRBIT_ACL: retnum += nfsrv_buildacl(nd, aclp, vnode_vtype(vp), p); break; case NFSATTRBIT_ACLSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4ACE_SUPTYPES); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CANSETTIME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_CANSETTIME) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEINSENSITIVE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEPRESERVING: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHOWNRESTRICTED: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FILEHANDLE: retnum += nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); break; case NFSATTRBIT_FILEID: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = vap->va_fileid; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESAVAIL: /* * Check quota and use min(quota, f_ffree). */ freenum = fs->f_ffree; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, &dqb)) freenum = min(dqb.dqb_isoftlimit-dqb.dqb_curinodes, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(freenum); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESFREE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fs->f_ffree); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESTOTAL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fs->f_files); retnum += NFSX_HYPER; break; case NFSATTRBIT_FSLOCATIONS: NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = 0; retnum += 2 * NFSX_UNSIGNED; break; case NFSATTRBIT_HOMOGENEOUS: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_HOMOGENEOUS) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXFILESIZE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = NFSRV_MAXFILESIZE; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_MAXLINK: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_LINK_MAX); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXNAME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_MAXNAMLEN); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXREAD: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fsinf.fs_rtmax); retnum += NFSX_HYPER; break; case NFSATTRBIT_MAXWRITE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fsinf.fs_wtmax); retnum += NFSX_HYPER; break; case NFSATTRBIT_MODE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_mode(vap->va_mode); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NOTRUNC: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NUMLINKS: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(vap->va_nlink); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: cp = namestr; nfsv4_uidtostr(vap->va_uid, &cp, &siz); retnum += nfsm_strtom(nd, cp, siz); if (cp != namestr) free(cp, M_NFSSTRING); break; case NFSATTRBIT_OWNERGROUP: cp = namestr; nfsv4_gidtostr(vap->va_gid, &cp, &siz); retnum += nfsm_strtom(nd, cp, siz); if (cp != namestr) free(cp, M_NFSSTRING); break; case NFSATTRBIT_QUOTAHARD: if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA)) freenum = fs->f_bfree; else freenum = fs->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, &dqb)) freenum = min(dqb.dqb_bhardlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs->f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_QUOTASOFT: if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA)) freenum = fs->f_bfree; else freenum = fs->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, &dqb)) freenum = min(dqb.dqb_bsoftlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs->f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_QUOTAUSED: freenum = 0; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, &dqb)) freenum = dqb.dqb_curblocks; p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs->f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_RAWDEV: NFSM_BUILD(tl, u_int32_t *, NFSX_V4SPECDATA); *tl++ = txdr_unsigned(NFSMAJOR(vap->va_rdev)); *tl = txdr_unsigned(NFSMINOR(vap->va_rdev)); retnum += NFSX_V4SPECDATA; break; case NFSATTRBIT_SPACEAVAIL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE)) { if (pnfssf != NULL) uquad = (u_int64_t)pnfssf->f_bfree; else uquad = (u_int64_t)fs->f_bfree; } else { if (pnfssf != NULL) uquad = (u_int64_t)pnfssf->f_bavail; else uquad = (u_int64_t)fs->f_bavail; } if (pnfssf != NULL) uquad *= pnfssf->f_bsize; else uquad *= fs->f_bsize; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACEFREE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (pnfssf != NULL) { uquad = (u_int64_t)pnfssf->f_bfree; uquad *= pnfssf->f_bsize; } else { uquad = (u_int64_t)fs->f_bfree; uquad *= fs->f_bsize; } txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACETOTAL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (pnfssf != NULL) { uquad = (u_int64_t)pnfssf->f_blocks; uquad *= pnfssf->f_bsize; } else { uquad = (u_int64_t)fs->f_blocks; uquad *= fs->f_bsize; } txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACEUSED: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_bytes, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_TIMEACCESS: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_atime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEACCESSSET: if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_V4SETTIME); *tl++ = txdr_unsigned(NFSV4SATTRTIME_TOCLIENT); txdr_nfsv4time(&vap->va_atime, tl); retnum += NFSX_V4SETTIME; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4SATTRTIME_TOSERVER); retnum += NFSX_UNSIGNED; } break; case NFSATTRBIT_TIMEDELTA: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); temptime.tv_sec = 0; temptime.tv_nsec = 1000000000 / hz; txdr_nfsv4time(&temptime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMETADATA: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_ctime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFY: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_mtime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_birthtime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_V4SETTIME); *tl++ = txdr_unsigned(NFSV4SATTRTIME_TOCLIENT); txdr_nfsv4time(&vap->va_mtime, tl); retnum += NFSX_V4SETTIME; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4SATTRTIME_TOSERVER); retnum += NFSX_UNSIGNED; } break; case NFSATTRBIT_MOUNTEDONFILEID: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (at_root != 0) uquad = mounted_on_fileno; else uquad = vap->va_fileid; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SUPPATTREXCLCREAT: NFSSETSUPP_ATTRBIT(&attrbits, nd); NFSCLRNOTSETABLE_ATTRBIT(&attrbits, nd); NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); retnum += nfsrv_putattrbit(nd, &attrbits); break; case NFSATTRBIT_FSLAYOUTTYPE: case NFSATTRBIT_LAYOUTTYPE: if (nfsrv_devidcnt == 0) siz = 1; else siz = 2; if (siz == 2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(1); /* One entry. */ if (nfsrv_doflexfile != 0 || nfsrv_maxpnfsmirror > 1) *tl = txdr_unsigned(NFSLAYOUT_FLEXFILE); else *tl = txdr_unsigned( NFSLAYOUT_NFSV4_1_FILES); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = 0; } retnum += siz * NFSX_UNSIGNED; break; case NFSATTRBIT_LAYOUTALIGNMENT: case NFSATTRBIT_LAYOUTBLKSIZE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); - *tl = txdr_unsigned(NFS_SRVMAXIO); + *tl = txdr_unsigned(nfs_srvmaxio); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_XATTRSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (xattrsupp) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; default: printf("EEK! Bad V4 attribute bitpos=%d\n", bitpos); } } } if (naclp != NULL) acl_free(naclp); free(fs, M_STATFS); *retnump = txdr_unsigned(retnum); return (retnum + prefixnum); } /* * Put the attribute bits onto an mbuf list. * Return the number of bytes of output generated. */ int nfsrv_putattrbit(struct nfsrv_descript *nd, nfsattrbit_t *attrbitp) { u_int32_t *tl; int cnt, i, bytesize; for (cnt = NFSATTRBIT_MAXWORDS; cnt > 0; cnt--) if (attrbitp->bits[cnt - 1]) break; bytesize = (cnt + 1) * NFSX_UNSIGNED; NFSM_BUILD(tl, u_int32_t *, bytesize); *tl++ = txdr_unsigned(cnt); for (i = 0; i < cnt; i++) *tl++ = txdr_unsigned(attrbitp->bits[i]); return (bytesize); } /* * Convert a uid to a string. * If the lookup fails, just output the digits. * uid - the user id * cpp - points to a buffer of size NFSV4_SMALLSTR * (malloc a larger one, as required) * retlenp - pointer to length to be returned */ void nfsv4_uidtostr(uid_t uid, u_char **cpp, int *retlenp) { int i; struct nfsusrgrp *usrp; u_char *cp = *cpp; uid_t tmp; int cnt, hasampersand, len = NFSV4_SMALLSTR, ret; struct nfsrv_lughash *hp; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0 && !nfs_enable_uidtostring) { /* * Always map nfsrv_defaultuid to "nobody". */ if (uid == nfsrv_defaultuid) { i = nfsrv_dnsnamelen + 7; if (i > len) { if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY("nobody@", cp, 7); cp += 7; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); return; } hasampersand = 0; hp = NFSUSERHASH(uid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { if (usrp->lug_uid == uid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; /* * If the name doesn't already have an '@' * in it, append @domainname to it. */ for (i = 0; i < usrp->lug_namelen; i++) { if (usrp->lug_name[i] == '@') { hasampersand = 1; break; } } if (hasampersand) i = usrp->lug_namelen; else i = usrp->lug_namelen + nfsrv_dnsnamelen + 1; if (i > len) { mtx_unlock(&hp->mtx); if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY(usrp->lug_name, cp, usrp->lug_namelen); if (!hasampersand) { cp += usrp->lug_namelen; *cp++ = '@'; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); } TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); return; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUID, uid, (gid_t)0, NULL); if (ret == 0 && cnt < 2) goto tryagain; } /* * No match, just return a string of digits. */ tmp = uid; i = 0; while (tmp || i == 0) { tmp /= 10; i++; } len = (i > len) ? len : i; *retlenp = len; cp += (len - 1); tmp = uid; for (i = 0; i < len; i++) { *cp-- = '0' + (tmp % 10); tmp /= 10; } return; } /* * Get a credential for the uid with the server's group list. * If none is found, just return the credential passed in after * logging a warning message. */ struct ucred * nfsrv_getgrpscred(struct ucred *oldcred) { struct nfsusrgrp *usrp; struct ucred *newcred; int cnt, ret; uid_t uid; struct nfsrv_lughash *hp; cnt = 0; uid = oldcred->cr_uid; tryagain: if (nfsrv_dnsnamelen > 0) { hp = NFSUSERHASH(uid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { if (usrp->lug_uid == uid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; if (usrp->lug_cred != NULL) { newcred = crhold(usrp->lug_cred); crfree(oldcred); } else newcred = oldcred; TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); return (newcred); } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUID, uid, (gid_t)0, NULL); if (ret == 0 && cnt < 2) goto tryagain; } return (oldcred); } /* * Convert a string to a uid. * If no conversion is possible return NFSERR_BADOWNER, otherwise * return 0. * If this is called from a client side mount using AUTH_SYS and the * string is made up entirely of digits, just convert the string to * a number. */ int nfsv4_strtouid(struct nfsrv_descript *nd, u_char *str, int len, uid_t *uidp) { int i; char *cp, *endstr, *str0; struct nfsusrgrp *usrp; int cnt, ret; int error = 0; uid_t tuid; struct nfsrv_lughash *hp, *hp2; if (len == 0) { error = NFSERR_BADOWNER; goto out; } /* If a string of digits and an AUTH_SYS mount, just convert it. */ str0 = str; tuid = (uid_t)strtoul(str0, &endstr, 10); if ((endstr - str0) == len) { /* A numeric string. */ if ((nd->nd_flag & ND_KERBV) == 0 && ((nd->nd_flag & ND_NFSCL) != 0 || nfsd_enable_stringtouid != 0)) *uidp = tuid; else error = NFSERR_BADOWNER; goto out; } /* * Look for an '@'. */ cp = strchr(str0, '@'); if (cp != NULL) i = (int)(cp++ - str0); else i = len; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0) { /* * If an '@' is found and the domain name matches, search for * the name with dns stripped off. * Mixed case alpahbetics will match for the domain name, but * all upper case will not. */ if (cnt == 0 && i < len && i > 0 && (len - 1 - i) == nfsrv_dnsnamelen && !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) { len -= (nfsrv_dnsnamelen + 1); *(cp - 1) = '\0'; } /* * Check for the special case of "nobody". */ if (len == 6 && !NFSBCMP(str, "nobody", 6)) { *uidp = nfsrv_defaultuid; error = 0; goto out; } hp = NFSUSERNAMEHASH(str, len); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_namehash) { if (usrp->lug_namelen == len && !NFSBCMP(usrp->lug_name, str, len)) { if (usrp->lug_expiry < NFSD_MONOSEC) break; hp2 = NFSUSERHASH(usrp->lug_uid); mtx_lock(&hp2->mtx); TAILQ_REMOVE(&hp2->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp2->lughead, usrp, lug_numhash); *uidp = usrp->lug_uid; mtx_unlock(&hp2->mtx); mtx_unlock(&hp->mtx); error = 0; goto out; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUSER, (uid_t)0, (gid_t)0, str); if (ret == 0 && cnt < 2) goto tryagain; } error = NFSERR_BADOWNER; out: NFSEXITCODE(error); return (error); } /* * Convert a gid to a string. * gid - the group id * cpp - points to a buffer of size NFSV4_SMALLSTR * (malloc a larger one, as required) * retlenp - pointer to length to be returned */ void nfsv4_gidtostr(gid_t gid, u_char **cpp, int *retlenp) { int i; struct nfsusrgrp *usrp; u_char *cp = *cpp; gid_t tmp; int cnt, hasampersand, len = NFSV4_SMALLSTR, ret; struct nfsrv_lughash *hp; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0 && !nfs_enable_uidtostring) { /* * Always map nfsrv_defaultgid to "nogroup". */ if (gid == nfsrv_defaultgid) { i = nfsrv_dnsnamelen + 8; if (i > len) { if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY("nogroup@", cp, 8); cp += 8; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); return; } hasampersand = 0; hp = NFSGROUPHASH(gid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { if (usrp->lug_gid == gid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; /* * If the name doesn't already have an '@' * in it, append @domainname to it. */ for (i = 0; i < usrp->lug_namelen; i++) { if (usrp->lug_name[i] == '@') { hasampersand = 1; break; } } if (hasampersand) i = usrp->lug_namelen; else i = usrp->lug_namelen + nfsrv_dnsnamelen + 1; if (i > len) { mtx_unlock(&hp->mtx); if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY(usrp->lug_name, cp, usrp->lug_namelen); if (!hasampersand) { cp += usrp->lug_namelen; *cp++ = '@'; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); } TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); return; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETGID, (uid_t)0, gid, NULL); if (ret == 0 && cnt < 2) goto tryagain; } /* * No match, just return a string of digits. */ tmp = gid; i = 0; while (tmp || i == 0) { tmp /= 10; i++; } len = (i > len) ? len : i; *retlenp = len; cp += (len - 1); tmp = gid; for (i = 0; i < len; i++) { *cp-- = '0' + (tmp % 10); tmp /= 10; } return; } /* * Convert a string to a gid. * If no conversion is possible return NFSERR_BADOWNER, otherwise * return 0. * If this is called from a client side mount using AUTH_SYS and the * string is made up entirely of digits, just convert the string to * a number. */ int nfsv4_strtogid(struct nfsrv_descript *nd, u_char *str, int len, gid_t *gidp) { int i; char *cp, *endstr, *str0; struct nfsusrgrp *usrp; int cnt, ret; int error = 0; gid_t tgid; struct nfsrv_lughash *hp, *hp2; if (len == 0) { error = NFSERR_BADOWNER; goto out; } /* If a string of digits and an AUTH_SYS mount, just convert it. */ str0 = str; tgid = (gid_t)strtoul(str0, &endstr, 10); if ((endstr - str0) == len) { /* A numeric string. */ if ((nd->nd_flag & ND_KERBV) == 0 && ((nd->nd_flag & ND_NFSCL) != 0 || nfsd_enable_stringtouid != 0)) *gidp = tgid; else error = NFSERR_BADOWNER; goto out; } /* * Look for an '@'. */ cp = strchr(str0, '@'); if (cp != NULL) i = (int)(cp++ - str0); else i = len; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0) { /* * If an '@' is found and the dns name matches, search for the * name with the dns stripped off. */ if (cnt == 0 && i < len && i > 0 && (len - 1 - i) == nfsrv_dnsnamelen && !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) { len -= (nfsrv_dnsnamelen + 1); *(cp - 1) = '\0'; } /* * Check for the special case of "nogroup". */ if (len == 7 && !NFSBCMP(str, "nogroup", 7)) { *gidp = nfsrv_defaultgid; error = 0; goto out; } hp = NFSGROUPNAMEHASH(str, len); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_namehash) { if (usrp->lug_namelen == len && !NFSBCMP(usrp->lug_name, str, len)) { if (usrp->lug_expiry < NFSD_MONOSEC) break; hp2 = NFSGROUPHASH(usrp->lug_gid); mtx_lock(&hp2->mtx); TAILQ_REMOVE(&hp2->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp2->lughead, usrp, lug_numhash); *gidp = usrp->lug_gid; mtx_unlock(&hp2->mtx); mtx_unlock(&hp->mtx); error = 0; goto out; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETGROUP, (uid_t)0, (gid_t)0, str); if (ret == 0 && cnt < 2) goto tryagain; } error = NFSERR_BADOWNER; out: NFSEXITCODE(error); return (error); } /* * Cmp len chars, allowing mixed case in the first argument to match lower * case in the second, but not if the first argument is all upper case. * Return 0 for a match, 1 otherwise. */ static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len) { int i; u_char tmp; int fndlower = 0; for (i = 0; i < len; i++) { if (*cp >= 'A' && *cp <= 'Z') { tmp = *cp++ + ('a' - 'A'); } else { tmp = *cp++; if (tmp >= 'a' && tmp <= 'z') fndlower = 1; } if (tmp != *cp2++) return (1); } if (fndlower) return (0); else return (1); } /* * Set the port for the nfsuserd. */ int nfsrv_nfsuserdport(struct nfsuserd_args *nargs, NFSPROC_T *p) { struct nfssockreq *rp; #ifdef INET struct sockaddr_in *ad; #endif #ifdef INET6 struct sockaddr_in6 *ad6; const struct in6_addr in6loopback = IN6ADDR_LOOPBACK_INIT; #endif int error; NFSLOCKNAMEID(); if (nfsrv_nfsuserd != NOTRUNNING) { NFSUNLOCKNAMEID(); error = EPERM; goto out; } nfsrv_nfsuserd = STARTSTOP; /* * Set up the socket record and connect. * Set nr_client NULL before unlocking, just to ensure that no other * process/thread/core will use a bogus old value. This could only * occur if the use of the nameid lock to protect nfsrv_nfsuserd is * broken. */ rp = &nfsrv_nfsuserdsock; rp->nr_client = NULL; NFSUNLOCKNAMEID(); rp->nr_sotype = SOCK_DGRAM; rp->nr_soproto = IPPROTO_UDP; rp->nr_lock = (NFSR_RESERVEDPORT | NFSR_LOCALHOST); rp->nr_cred = NULL; rp->nr_prog = RPCPROG_NFSUSERD; error = 0; switch (nargs->nuserd_family) { #ifdef INET case AF_INET: rp->nr_nam = malloc(sizeof(struct sockaddr_in), M_SONAME, M_WAITOK | M_ZERO); ad = (struct sockaddr_in *)rp->nr_nam; ad->sin_len = sizeof(struct sockaddr_in); ad->sin_family = AF_INET; ad->sin_addr.s_addr = htonl(INADDR_LOOPBACK); ad->sin_port = nargs->nuserd_port; break; #endif #ifdef INET6 case AF_INET6: rp->nr_nam = malloc(sizeof(struct sockaddr_in6), M_SONAME, M_WAITOK | M_ZERO); ad6 = (struct sockaddr_in6 *)rp->nr_nam; ad6->sin6_len = sizeof(struct sockaddr_in6); ad6->sin6_family = AF_INET6; ad6->sin6_addr = in6loopback; ad6->sin6_port = nargs->nuserd_port; break; #endif default: error = ENXIO; } rp->nr_vers = RPCNFSUSERD_VERS; if (error == 0) error = newnfs_connect(NULL, rp, NFSPROCCRED(p), p, 0, false, &rp->nr_client); if (error == 0) { NFSLOCKNAMEID(); nfsrv_nfsuserd = RUNNING; NFSUNLOCKNAMEID(); } else { free(rp->nr_nam, M_SONAME); NFSLOCKNAMEID(); nfsrv_nfsuserd = NOTRUNNING; NFSUNLOCKNAMEID(); } out: NFSEXITCODE(error); return (error); } /* * Delete the nfsuserd port. */ void nfsrv_nfsuserddelport(void) { NFSLOCKNAMEID(); if (nfsrv_nfsuserd != RUNNING) { NFSUNLOCKNAMEID(); return; } nfsrv_nfsuserd = STARTSTOP; /* Wait for all upcalls to complete. */ while (nfsrv_userdupcalls > 0) msleep(&nfsrv_userdupcalls, NFSNAMEIDMUTEXPTR, PVFS, "nfsupcalls", 0); NFSUNLOCKNAMEID(); newnfs_disconnect(NULL, &nfsrv_nfsuserdsock); free(nfsrv_nfsuserdsock.nr_nam, M_SONAME); NFSLOCKNAMEID(); nfsrv_nfsuserd = NOTRUNNING; NFSUNLOCKNAMEID(); } /* * Do upcalls to the nfsuserd, for cache misses of the owner/ownergroup * name<-->id cache. * Returns 0 upon success, non-zero otherwise. */ static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name) { u_int32_t *tl; struct nfsrv_descript *nd; int len; struct nfsrv_descript nfsd; struct ucred *cred; int error; NFSLOCKNAMEID(); if (nfsrv_nfsuserd != RUNNING) { NFSUNLOCKNAMEID(); error = EPERM; goto out; } /* * Maintain a count of upcalls in progress, so that nfsrv_X() * can wait until no upcalls are in progress. */ nfsrv_userdupcalls++; NFSUNLOCKNAMEID(); KASSERT(nfsrv_userdupcalls > 0, ("nfsrv_getuser: non-positive upcalls")); nd = &nfsd; cred = newnfs_getcred(); nd->nd_flag = ND_GSSINITREPLY; nfsrvd_rephead(nd); nd->nd_procnum = procnum; if (procnum == RPCNFSUSERD_GETUID || procnum == RPCNFSUSERD_GETGID) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (procnum == RPCNFSUSERD_GETUID) *tl = txdr_unsigned(uid); else *tl = txdr_unsigned(gid); } else { len = strlen(name); (void) nfsm_strtom(nd, name, len); } error = newnfs_request(nd, NULL, NULL, &nfsrv_nfsuserdsock, NULL, NULL, cred, RPCPROG_NFSUSERD, RPCNFSUSERD_VERS, NULL, 0, NULL, NULL); NFSLOCKNAMEID(); if (--nfsrv_userdupcalls == 0 && nfsrv_nfsuserd == STARTSTOP) wakeup(&nfsrv_userdupcalls); NFSUNLOCKNAMEID(); NFSFREECRED(cred); if (!error) { m_freem(nd->nd_mrep); error = nd->nd_repstat; } out: NFSEXITCODE(error); return (error); } /* * This function is called from the nfssvc(2) system call, to update the * kernel user/group name list(s) for the V4 owner and ownergroup attributes. */ int nfssvc_idname(struct nfsd_idargs *nidp) { struct nfsusrgrp *nusrp, *usrp, *newusrp; struct nfsrv_lughash *hp_name, *hp_idnum, *thp; int i, group_locked, groupname_locked, user_locked, username_locked; int error = 0; u_char *cp; gid_t *grps; struct ucred *cr; static int onethread = 0; static time_t lasttime = 0; if (nidp->nid_namelen <= 0 || nidp->nid_namelen > MAXHOSTNAMELEN) { error = EINVAL; goto out; } if (nidp->nid_flag & NFSID_INITIALIZE) { cp = malloc(nidp->nid_namelen + 1, M_NFSSTRING, M_WAITOK); error = copyin(nidp->nid_name, cp, nidp->nid_namelen); if (error != 0) { free(cp, M_NFSSTRING); goto out; } if (atomic_cmpset_acq_int(&nfsrv_dnsnamelen, 0, 0) == 0) { /* * Free up all the old stuff and reinitialize hash * lists. All mutexes for both lists must be locked, * with the user/group name ones before the uid/gid * ones, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsusernamehash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsuserhash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_FOREACH_SAFE(usrp, &nfsuserhash[i].lughead, lug_numhash, nusrp) nfsrv_removeuser(usrp, 1); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsuserhash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsusernamehash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgroupnamehash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgrouphash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_FOREACH_SAFE(usrp, &nfsgrouphash[i].lughead, lug_numhash, nusrp) nfsrv_removeuser(usrp, 0); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgrouphash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgroupnamehash[i].mtx); free(nfsrv_dnsname, M_NFSSTRING); nfsrv_dnsname = NULL; } if (nfsuserhash == NULL) { /* Allocate the hash tables. */ nfsuserhash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsuserhash[i].mtx, "nfsuidhash", NULL, MTX_DEF | MTX_DUPOK); nfsusernamehash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsusernamehash[i].mtx, "nfsusrhash", NULL, MTX_DEF | MTX_DUPOK); nfsgrouphash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsgrouphash[i].mtx, "nfsgidhash", NULL, MTX_DEF | MTX_DUPOK); nfsgroupnamehash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsgroupnamehash[i].mtx, "nfsgrphash", NULL, MTX_DEF | MTX_DUPOK); } /* (Re)initialize the list heads. */ for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsuserhash[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsusernamehash[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsgrouphash[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsgroupnamehash[i].lughead); /* * Put name in "DNS" string. */ nfsrv_dnsname = cp; nfsrv_defaultuid = nidp->nid_uid; nfsrv_defaultgid = nidp->nid_gid; nfsrv_usercnt = 0; nfsrv_usermax = nidp->nid_usermax; atomic_store_rel_int(&nfsrv_dnsnamelen, nidp->nid_namelen); goto out; } /* * malloc the new one now, so any potential sleep occurs before * manipulation of the lists. */ newusrp = malloc(sizeof(struct nfsusrgrp) + nidp->nid_namelen, M_NFSUSERGROUP, M_WAITOK | M_ZERO); error = copyin(nidp->nid_name, newusrp->lug_name, nidp->nid_namelen); if (error == 0 && nidp->nid_ngroup > 0 && (nidp->nid_flag & NFSID_ADDUID) != 0) { grps = malloc(sizeof(gid_t) * nidp->nid_ngroup, M_TEMP, M_WAITOK); error = copyin(nidp->nid_grps, grps, sizeof(gid_t) * nidp->nid_ngroup); if (error == 0) { /* * Create a credential just like svc_getcred(), * but using the group list provided. */ cr = crget(); cr->cr_uid = cr->cr_ruid = cr->cr_svuid = nidp->nid_uid; crsetgroups(cr, nidp->nid_ngroup, grps); cr->cr_rgid = cr->cr_svgid = cr->cr_groups[0]; cr->cr_prison = &prison0; prison_hold(cr->cr_prison); #ifdef MAC mac_cred_associate_nfsd(cr); #endif newusrp->lug_cred = cr; } free(grps, M_TEMP); } if (error) { free(newusrp, M_NFSUSERGROUP); goto out; } newusrp->lug_namelen = nidp->nid_namelen; /* * The lock order is username[0]->[nfsrv_lughashsize - 1] followed * by uid[0]->[nfsrv_lughashsize - 1], with the same for group. * The flags user_locked, username_locked, group_locked and * groupname_locked are set to indicate all of those hash lists are * locked. hp_name != NULL and hp_idnum != NULL indicates that * the respective one mutex is locked. */ user_locked = username_locked = group_locked = groupname_locked = 0; hp_name = hp_idnum = NULL; /* * Delete old entries, as required. */ if (nidp->nid_flag & (NFSID_DELUID | NFSID_ADDUID)) { /* Must lock all username hash lists first, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsusernamehash[i].mtx); username_locked = 1; hp_idnum = NFSUSERHASH(nidp->nid_uid); mtx_lock(&hp_idnum->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_idnum->lughead, lug_numhash, nusrp) { if (usrp->lug_uid == nidp->nid_uid) nfsrv_removeuser(usrp, 1); } } else if (nidp->nid_flag & (NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) { hp_name = NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_lock(&hp_name->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_name->lughead, lug_namehash, nusrp) { if (usrp->lug_namelen == newusrp->lug_namelen && !NFSBCMP(usrp->lug_name, newusrp->lug_name, usrp->lug_namelen)) { thp = NFSUSERHASH(usrp->lug_uid); mtx_lock(&thp->mtx); nfsrv_removeuser(usrp, 1); mtx_unlock(&thp->mtx); } } hp_idnum = NFSUSERHASH(nidp->nid_uid); mtx_lock(&hp_idnum->mtx); } else if (nidp->nid_flag & (NFSID_DELGID | NFSID_ADDGID)) { /* Must lock all groupname hash lists first, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgroupnamehash[i].mtx); groupname_locked = 1; hp_idnum = NFSGROUPHASH(nidp->nid_gid); mtx_lock(&hp_idnum->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_idnum->lughead, lug_numhash, nusrp) { if (usrp->lug_gid == nidp->nid_gid) nfsrv_removeuser(usrp, 0); } } else if (nidp->nid_flag & (NFSID_DELGROUPNAME | NFSID_ADDGROUPNAME)) { hp_name = NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_lock(&hp_name->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_name->lughead, lug_namehash, nusrp) { if (usrp->lug_namelen == newusrp->lug_namelen && !NFSBCMP(usrp->lug_name, newusrp->lug_name, usrp->lug_namelen)) { thp = NFSGROUPHASH(usrp->lug_gid); mtx_lock(&thp->mtx); nfsrv_removeuser(usrp, 0); mtx_unlock(&thp->mtx); } } hp_idnum = NFSGROUPHASH(nidp->nid_gid); mtx_lock(&hp_idnum->mtx); } /* * Now, we can add the new one. */ if (nidp->nid_usertimeout) newusrp->lug_expiry = NFSD_MONOSEC + nidp->nid_usertimeout; else newusrp->lug_expiry = NFSD_MONOSEC + 5; if (nidp->nid_flag & (NFSID_ADDUID | NFSID_ADDUSERNAME)) { newusrp->lug_uid = nidp->nid_uid; thp = NFSUSERHASH(newusrp->lug_uid); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_numhash); thp = NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_namehash); atomic_add_int(&nfsrv_usercnt, 1); } else if (nidp->nid_flag & (NFSID_ADDGID | NFSID_ADDGROUPNAME)) { newusrp->lug_gid = nidp->nid_gid; thp = NFSGROUPHASH(newusrp->lug_gid); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_numhash); thp = NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_namehash); atomic_add_int(&nfsrv_usercnt, 1); } else { if (newusrp->lug_cred != NULL) crfree(newusrp->lug_cred); free(newusrp, M_NFSUSERGROUP); } /* * Once per second, allow one thread to trim the cache. */ if (lasttime < NFSD_MONOSEC && atomic_cmpset_acq_int(&onethread, 0, 1) != 0) { /* * First, unlock the single mutexes, so that all entries * can be locked and any LOR is avoided. */ if (hp_name != NULL) { mtx_unlock(&hp_name->mtx); hp_name = NULL; } if (hp_idnum != NULL) { mtx_unlock(&hp_idnum->mtx); hp_idnum = NULL; } if ((nidp->nid_flag & (NFSID_DELUID | NFSID_ADDUID | NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) != 0) { if (username_locked == 0) { for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsusernamehash[i].mtx); username_locked = 1; } KASSERT(user_locked == 0, ("nfssvc_idname: user_locked")); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsuserhash[i].mtx); user_locked = 1; for (i = 0; i < nfsrv_lughashsize; i++) { TAILQ_FOREACH_SAFE(usrp, &nfsuserhash[i].lughead, lug_numhash, nusrp) if (usrp->lug_expiry < NFSD_MONOSEC) nfsrv_removeuser(usrp, 1); } for (i = 0; i < nfsrv_lughashsize; i++) { /* * Trim the cache using an approximate LRU * algorithm. This code deletes the least * recently used entry on each hash list. */ if (nfsrv_usercnt <= nfsrv_usermax) break; usrp = TAILQ_FIRST(&nfsuserhash[i].lughead); if (usrp != NULL) nfsrv_removeuser(usrp, 1); } } else { if (groupname_locked == 0) { for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgroupnamehash[i].mtx); groupname_locked = 1; } KASSERT(group_locked == 0, ("nfssvc_idname: group_locked")); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgrouphash[i].mtx); group_locked = 1; for (i = 0; i < nfsrv_lughashsize; i++) { TAILQ_FOREACH_SAFE(usrp, &nfsgrouphash[i].lughead, lug_numhash, nusrp) if (usrp->lug_expiry < NFSD_MONOSEC) nfsrv_removeuser(usrp, 0); } for (i = 0; i < nfsrv_lughashsize; i++) { /* * Trim the cache using an approximate LRU * algorithm. This code deletes the least * recently user entry on each hash list. */ if (nfsrv_usercnt <= nfsrv_usermax) break; usrp = TAILQ_FIRST(&nfsgrouphash[i].lughead); if (usrp != NULL) nfsrv_removeuser(usrp, 0); } } lasttime = NFSD_MONOSEC; atomic_store_rel_int(&onethread, 0); } /* Now, unlock all locked mutexes. */ if (hp_idnum != NULL) mtx_unlock(&hp_idnum->mtx); if (hp_name != NULL) mtx_unlock(&hp_name->mtx); if (user_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsuserhash[i].mtx); if (username_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsusernamehash[i].mtx); if (group_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgrouphash[i].mtx); if (groupname_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgroupnamehash[i].mtx); out: NFSEXITCODE(error); return (error); } /* * Remove a user/group name element. */ static void nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser) { struct nfsrv_lughash *hp; if (isuser != 0) { hp = NFSUSERHASH(usrp->lug_uid); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp = NFSUSERNAMEHASH(usrp->lug_name, usrp->lug_namelen); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_namehash); } else { hp = NFSGROUPHASH(usrp->lug_gid); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp = NFSGROUPNAMEHASH(usrp->lug_name, usrp->lug_namelen); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_namehash); } atomic_add_int(&nfsrv_usercnt, -1); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } /* * Free up all the allocations related to the name<-->id cache. * This function should only be called when the nfsuserd daemon isn't * running, since it doesn't do any locking. * This function is meant to be used when the nfscommon module is unloaded. */ void nfsrv_cleanusergroup(void) { struct nfsrv_lughash *hp, *hp2; struct nfsusrgrp *nusrp, *usrp; int i; if (nfsuserhash == NULL) return; for (i = 0; i < nfsrv_lughashsize; i++) { hp = &nfsuserhash[i]; TAILQ_FOREACH_SAFE(usrp, &hp->lughead, lug_numhash, nusrp) { TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp2 = NFSUSERNAMEHASH(usrp->lug_name, usrp->lug_namelen); TAILQ_REMOVE(&hp2->lughead, usrp, lug_namehash); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } hp = &nfsgrouphash[i]; TAILQ_FOREACH_SAFE(usrp, &hp->lughead, lug_numhash, nusrp) { TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp2 = NFSGROUPNAMEHASH(usrp->lug_name, usrp->lug_namelen); TAILQ_REMOVE(&hp2->lughead, usrp, lug_namehash); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } mtx_destroy(&nfsuserhash[i].mtx); mtx_destroy(&nfsusernamehash[i].mtx); mtx_destroy(&nfsgroupnamehash[i].mtx); mtx_destroy(&nfsgrouphash[i].mtx); } free(nfsuserhash, M_NFSUSERGROUP); free(nfsusernamehash, M_NFSUSERGROUP); free(nfsgrouphash, M_NFSUSERGROUP); free(nfsgroupnamehash, M_NFSUSERGROUP); free(nfsrv_dnsname, M_NFSSTRING); } /* * This function scans a byte string and checks for UTF-8 compliance. * It returns 0 if it conforms and NFSERR_INVAL if not. */ int nfsrv_checkutf8(u_int8_t *cp, int len) { u_int32_t val = 0x0; int cnt = 0, gotd = 0, shift = 0; u_int8_t byte; static int utf8_shift[5] = { 7, 11, 16, 21, 26 }; int error = 0; /* * Here are what the variables are used for: * val - the calculated value of a multibyte char, used to check * that it was coded with the correct range * cnt - the number of 10xxxxxx bytes to follow * gotd - set for a char of Dxxx, so D800<->DFFF can be checked for * shift - lower order bits of range (ie. "val >> shift" should * not be 0, in other words, dividing by the lower bound * of the range should get a non-zero value) * byte - used to calculate cnt */ while (len > 0) { if (cnt > 0) { /* This handles the 10xxxxxx bytes */ if ((*cp & 0xc0) != 0x80 || (gotd && (*cp & 0x20))) { error = NFSERR_INVAL; goto out; } gotd = 0; val <<= 6; val |= (*cp & 0x3f); cnt--; if (cnt == 0 && (val >> shift) == 0x0) { error = NFSERR_INVAL; goto out; } } else if (*cp & 0x80) { /* first byte of multi byte char */ byte = *cp; while ((byte & 0x40) && cnt < 6) { cnt++; byte <<= 1; } if (cnt == 0 || cnt == 6) { error = NFSERR_INVAL; goto out; } val = (*cp & (0x3f >> cnt)); shift = utf8_shift[cnt - 1]; if (cnt == 2 && val == 0xd) /* Check for the 0xd800-0xdfff case */ gotd = 1; } cp++; len--; } if (cnt > 0) error = NFSERR_INVAL; out: NFSEXITCODE(error); return (error); } /* * Parse the xdr for an NFSv4 FsLocations attribute. Return two malloc'd * strings, one with the root path in it and the other with the list of * locations. The list is in the same format as is found in nfr_refs. * It is a "," separated list of entries, where each of them is of the * form :. For example * "nfsv4-test:/sub2,nfsv4-test2:/user/mnt,nfsv4-test2:/user/mnt2" * The nilp argument is set to 1 for the special case of a null fs_root * and an empty server list. * It returns NFSERR_BADXDR, if the xdr can't be parsed and returns the * number of xdr bytes parsed in sump. */ static int nfsrv_getrefstr(struct nfsrv_descript *nd, u_char **fsrootp, u_char **srvp, int *sump, int *nilp) { u_int32_t *tl; u_char *cp = NULL, *cp2 = NULL, *cp3, *str; int i, j, len, stringlen, cnt, slen, siz, xdrsum, error = 0, nsrv; struct list { SLIST_ENTRY(list) next; int len; u_char host[1]; } *lsp, *nlsp; SLIST_HEAD(, list) head; *fsrootp = NULL; *srvp = NULL; *nilp = 0; /* * Get the fs_root path and check for the special case of null path * and 0 length server list. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len < 0 || len > 10240) { error = NFSERR_BADXDR; goto nfsmout; } if (len == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl != 0) { error = NFSERR_BADXDR; goto nfsmout; } *nilp = 1; *sump = 2 * NFSX_UNSIGNED; error = 0; goto nfsmout; } cp = malloc(len + 1, M_NFSSTRING, M_WAITOK); error = nfsrv_mtostr(nd, cp, len); if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt <= 0) error = NFSERR_BADXDR; } if (error) goto nfsmout; /* * Now, loop through the location list and make up the srvlist. */ xdrsum = (2 * NFSX_UNSIGNED) + NFSM_RNDUP(len); cp2 = cp3 = malloc(1024, M_NFSSTRING, M_WAITOK); slen = 1024; siz = 0; for (i = 0; i < cnt; i++) { SLIST_INIT(&head); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nsrv = fxdr_unsigned(int, *tl); if (nsrv <= 0) { error = NFSERR_BADXDR; goto nfsmout; } /* * Handle the first server by putting it in the srvstr. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } nfsrv_refstrbigenough(siz + len + 3, &cp2, &cp3, &slen); if (cp3 != cp2) { *cp3++ = ','; siz++; } error = nfsrv_mtostr(nd, cp3, len); if (error) goto nfsmout; cp3 += len; *cp3++ = ':'; siz += (len + 1); xdrsum += (2 * NFSX_UNSIGNED) + NFSM_RNDUP(len); for (j = 1; j < nsrv; j++) { /* * Yuck, put them in an slist and process them later. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } lsp = (struct list *)malloc(sizeof (struct list) + len, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, lsp->host, len); if (error) goto nfsmout; xdrsum += NFSX_UNSIGNED + NFSM_RNDUP(len); lsp->len = len; SLIST_INSERT_HEAD(&head, lsp, next); } /* * Finally, we can get the path. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } nfsrv_refstrbigenough(siz + len + 1, &cp2, &cp3, &slen); error = nfsrv_mtostr(nd, cp3, len); if (error) goto nfsmout; xdrsum += NFSX_UNSIGNED + NFSM_RNDUP(len); str = cp3; stringlen = len; cp3 += len; siz += len; SLIST_FOREACH_SAFE(lsp, &head, next, nlsp) { nfsrv_refstrbigenough(siz + lsp->len + stringlen + 3, &cp2, &cp3, &slen); *cp3++ = ','; NFSBCOPY(lsp->host, cp3, lsp->len); cp3 += lsp->len; *cp3++ = ':'; NFSBCOPY(str, cp3, stringlen); cp3 += stringlen; *cp3 = '\0'; siz += (lsp->len + stringlen + 2); free(lsp, M_TEMP); } } *fsrootp = cp; *srvp = cp2; *sump = xdrsum; NFSEXITCODE2(0, nd); return (0); nfsmout: if (cp != NULL) free(cp, M_NFSSTRING); if (cp2 != NULL) free(cp2, M_NFSSTRING); NFSEXITCODE2(error, nd); return (error); } /* * Make the malloc'd space large enough. This is a pain, but the xdr * doesn't set an upper bound on the side, so... */ static void nfsrv_refstrbigenough(int siz, u_char **cpp, u_char **cpp2, int *slenp) { u_char *cp; int i; if (siz <= *slenp) return; cp = malloc(siz + 1024, M_NFSSTRING, M_WAITOK); NFSBCOPY(*cpp, cp, *slenp); free(*cpp, M_NFSSTRING); i = *cpp2 - *cpp; *cpp = cp; *cpp2 = cp + i; *slenp = siz + 1024; } /* * Initialize the reply header data structures. */ void nfsrvd_rephead(struct nfsrv_descript *nd) { struct mbuf *mreq; if ((nd->nd_flag & ND_EXTPG) != 0) { mreq = mb_alloc_ext_plus_pages(PAGE_SIZE, M_WAITOK); nd->nd_mreq = nd->nd_mb = mreq; nd->nd_bpos = (char *)(void *) PHYS_TO_DMAP(mreq->m_epg_pa[0]); nd->nd_bextpg = 0; nd->nd_bextpgsiz = PAGE_SIZE; } else { /* * If this is a big reply, use a cluster. */ if ((nd->nd_flag & ND_GSSINITREPLY) == 0 && nfs_bigreply[nd->nd_procnum]) { NFSMCLGET(mreq, M_WAITOK); nd->nd_mreq = mreq; nd->nd_mb = mreq; } else { NFSMGET(mreq); nd->nd_mreq = mreq; nd->nd_mb = mreq; } nd->nd_bpos = mtod(mreq, char *); mreq->m_len = 0; } if ((nd->nd_flag & ND_GSSINITREPLY) == 0) NFSM_BUILD(nd->nd_errp, int *, NFSX_UNSIGNED); } /* * Lock a socket against others. * Currently used to serialize connect/disconnect attempts. */ int newnfs_sndlock(int *flagp) { struct timespec ts; NFSLOCKSOCK(); while (*flagp & NFSR_SNDLOCK) { *flagp |= NFSR_WANTSND; ts.tv_sec = 0; ts.tv_nsec = 0; (void) nfsmsleep((caddr_t)flagp, NFSSOCKMUTEXPTR, PZERO - 1, "nfsndlck", &ts); } *flagp |= NFSR_SNDLOCK; NFSUNLOCKSOCK(); return (0); } /* * Unlock the stream socket for others. */ void newnfs_sndunlock(int *flagp) { NFSLOCKSOCK(); if ((*flagp & NFSR_SNDLOCK) == 0) panic("nfs sndunlock"); *flagp &= ~NFSR_SNDLOCK; if (*flagp & NFSR_WANTSND) { *flagp &= ~NFSR_WANTSND; wakeup((caddr_t)flagp); } NFSUNLOCKSOCK(); } int nfsv4_getipaddr(struct nfsrv_descript *nd, struct sockaddr_in *sin, struct sockaddr_in6 *sin6, sa_family_t *saf, int *isudp) { struct in_addr saddr; uint32_t portnum, *tl; int i, j, k; sa_family_t af = AF_UNSPEC; char addr[64], protocol[5], *cp; int cantparse = 0, error = 0; uint16_t portv; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i >= 3 && i <= 4) { error = nfsrv_mtostr(nd, protocol, i); if (error) goto nfsmout; if (strcmp(protocol, "tcp") == 0) { af = AF_INET; *isudp = 0; } else if (strcmp(protocol, "udp") == 0) { af = AF_INET; *isudp = 1; } else if (strcmp(protocol, "tcp6") == 0) { af = AF_INET6; *isudp = 0; } else if (strcmp(protocol, "udp6") == 0) { af = AF_INET6; *isudp = 1; } else cantparse = 1; } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i < 0) { error = NFSERR_BADXDR; goto nfsmout; } else if (cantparse == 0 && i >= 11 && i < 64) { /* * The shortest address is 11chars and the longest is < 64. */ error = nfsrv_mtostr(nd, addr, i); if (error) goto nfsmout; /* Find the port# at the end and extract that. */ i = strlen(addr); k = 0; cp = &addr[i - 1]; /* Count back two '.'s from end to get port# field. */ for (j = 0; j < i; j++) { if (*cp == '.') { k++; if (k == 2) break; } cp--; } if (k == 2) { /* * The NFSv4 port# is appended as .N.N, where N is * a decimal # in the range 0-255, just like an inet4 * address. Cheat and use inet_aton(), which will * return a Class A address and then shift the high * order 8bits over to convert it to the port#. */ *cp++ = '\0'; if (inet_aton(cp, &saddr) == 1) { portnum = ntohl(saddr.s_addr); portv = (uint16_t)((portnum >> 16) | (portnum & 0xff)); } else cantparse = 1; } else cantparse = 1; if (cantparse == 0) { if (af == AF_INET) { if (inet_pton(af, addr, &sin->sin_addr) == 1) { sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_port = htons(portv); *saf = af; return (0); } } else { if (inet_pton(af, addr, &sin6->sin6_addr) == 1) { sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(portv); *saf = af; return (0); } } } } else { if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } error = EPERM; nfsmout: return (error); } /* * Handle an NFSv4.1 Sequence request for the session. * If reply != NULL, use it to return the cached reply, as required. * The client gets a cached reply via this call for callbacks, however the * server gets a cached reply via the nfsv4_seqsess_cacherep() call. */ int nfsv4_seqsession(uint32_t seqid, uint32_t slotid, uint32_t highslot, struct nfsslot *slots, struct mbuf **reply, uint16_t maxslot) { struct mbuf *m; int error; error = 0; if (reply != NULL) *reply = NULL; if (slotid > maxslot) return (NFSERR_BADSLOT); if (seqid == slots[slotid].nfssl_seq) { /* A retry. */ if (slots[slotid].nfssl_inprog != 0) error = NFSERR_DELAY; else if (slots[slotid].nfssl_reply != NULL) { if (reply != NULL) { m = m_copym(slots[slotid].nfssl_reply, 0, M_COPYALL, M_NOWAIT); if (m != NULL) *reply = m; else { *reply = slots[slotid].nfssl_reply; slots[slotid].nfssl_reply = NULL; } } slots[slotid].nfssl_inprog = 1; error = NFSERR_REPLYFROMCACHE; } else /* No reply cached, so just do it. */ slots[slotid].nfssl_inprog = 1; } else if ((slots[slotid].nfssl_seq + 1) == seqid) { if (slots[slotid].nfssl_reply != NULL) m_freem(slots[slotid].nfssl_reply); slots[slotid].nfssl_reply = NULL; slots[slotid].nfssl_inprog = 1; slots[slotid].nfssl_seq++; } else error = NFSERR_SEQMISORDERED; return (error); } /* * Cache this reply for the slot. * Use the "rep" argument to return the cached reply if repstat is set to * NFSERR_REPLYFROMCACHE. The client never sets repstat to this value. */ void nfsv4_seqsess_cacherep(uint32_t slotid, struct nfsslot *slots, int repstat, struct mbuf **rep) { struct mbuf *m; if (repstat == NFSERR_REPLYFROMCACHE) { if (slots[slotid].nfssl_reply != NULL) { /* * We cannot sleep here, but copy will usually * succeed. */ m = m_copym(slots[slotid].nfssl_reply, 0, M_COPYALL, M_NOWAIT); if (m != NULL) *rep = m; else { /* * Multiple retries would be extremely rare, * so using the cached reply will likely * be ok. */ *rep = slots[slotid].nfssl_reply; slots[slotid].nfssl_reply = NULL; } } else *rep = NULL; } else { if (slots[slotid].nfssl_reply != NULL) m_freem(slots[slotid].nfssl_reply); slots[slotid].nfssl_reply = *rep; } slots[slotid].nfssl_inprog = 0; } /* * Generate the xdr for an NFSv4.1 Sequence Operation. */ void nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, struct nfsclsession *sep, int dont_replycache) { uint32_t *tl, slotseq = 0; int error, maxslot, slotpos; uint8_t sessionid[NFSX_V4SESSIONID]; error = nfsv4_sequencelookup(nmp, sep, &slotpos, &maxslot, &slotseq, sessionid); nd->nd_maxreq = sep->nfsess_maxreq; nd->nd_maxresp = sep->nfsess_maxresp; /* Build the Sequence arguments. */ NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); nd->nd_sequence = tl; bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; nd->nd_slotseq = tl; if (error == 0) { nd->nd_flag |= ND_HASSLOTID; nd->nd_slotid = slotpos; *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl++ = txdr_unsigned(maxslot); if (dont_replycache == 0) *tl = newnfs_true; else *tl = newnfs_false; } else { /* * There are two errors and the rest of the session can * just be zeros. * NFSERR_BADSESSION: This bad session should just generate * the same error again when the RPC is retried. * ESTALE: A forced dismount is in progress and will cause the * RPC to fail later. */ *tl++ = 0; *tl++ = 0; *tl++ = 0; *tl = 0; } nd->nd_flag |= ND_HASSEQUENCE; } int nfsv4_sequencelookup(struct nfsmount *nmp, struct nfsclsession *sep, int *slotposp, int *maxslotp, uint32_t *slotseqp, uint8_t *sessionid) { int i, maxslot, slotpos; uint64_t bitval; /* Find an unused slot. */ slotpos = -1; maxslot = -1; mtx_lock(&sep->nfsess_mtx); do { if (nmp != NULL && sep->nfsess_defunct != 0) { /* Just return the bad session. */ bcopy(sep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID); mtx_unlock(&sep->nfsess_mtx); return (NFSERR_BADSESSION); } bitval = 1; for (i = 0; i < sep->nfsess_foreslots; i++) { if ((bitval & sep->nfsess_slots) == 0) { slotpos = i; sep->nfsess_slots |= bitval; sep->nfsess_slotseq[i]++; *slotseqp = sep->nfsess_slotseq[i]; break; } bitval <<= 1; } if (slotpos == -1) { /* * If a forced dismount is in progress, just return. * This RPC attempt will fail when it calls * newnfs_request(). */ if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) { mtx_unlock(&sep->nfsess_mtx); return (ESTALE); } /* Wake up once/sec, to check for a forced dismount. */ (void)mtx_sleep(&sep->nfsess_slots, &sep->nfsess_mtx, PZERO, "nfsclseq", hz); } } while (slotpos == -1); /* Now, find the highest slot in use. (nfsc_slots is 64bits) */ bitval = 1; for (i = 0; i < 64; i++) { if ((bitval & sep->nfsess_slots) != 0) maxslot = i; bitval <<= 1; } bcopy(sep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID); mtx_unlock(&sep->nfsess_mtx); *slotposp = slotpos; *maxslotp = maxslot; return (0); } /* * Free a session slot. */ void nfsv4_freeslot(struct nfsclsession *sep, int slot, bool resetseq) { uint64_t bitval; bitval = 1; if (slot > 0) bitval <<= slot; mtx_lock(&sep->nfsess_mtx); if (resetseq) sep->nfsess_slotseq[slot]--; if ((bitval & sep->nfsess_slots) == 0) printf("freeing free slot!!\n"); sep->nfsess_slots &= ~bitval; wakeup(&sep->nfsess_slots); mtx_unlock(&sep->nfsess_mtx); } /* * Search for a matching pnfsd DS, based on the nmp arg. * Return one if found, NULL otherwise. */ struct nfsdevice * nfsv4_findmirror(struct nfsmount *nmp) { struct nfsdevice *ds; mtx_assert(NFSDDSMUTEXPTR, MA_OWNED); /* * Search the DS server list for a match with nmp. */ if (nfsrv_devidcnt == 0) return (NULL); TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp == nmp) { NFSCL_DEBUG(4, "nfsv4_findmirror: fnd main ds\n"); break; } } return (ds); } /* * Fill in the fields of "struct nfsrv_descript". */ void nfsm_set(struct nfsrv_descript *nd, u_int offs) { struct mbuf *m; int rlen; m = nd->nd_mb; if ((m->m_flags & M_EXTPG) != 0) { nd->nd_bextpg = 0; while (offs > 0) { if (nd->nd_bextpg == 0) rlen = m_epg_pagelen(m, 0, m->m_epg_1st_off); else rlen = m_epg_pagelen(m, nd->nd_bextpg, 0); if (offs <= rlen) break; offs -= rlen; nd->nd_bextpg++; if (nd->nd_bextpg == m->m_epg_npgs) { printf("nfsm_set: build offs " "out of range\n"); nd->nd_bextpg--; break; } } nd->nd_bpos = (char *)(void *) PHYS_TO_DMAP(m->m_epg_pa[nd->nd_bextpg]); if (nd->nd_bextpg == 0) nd->nd_bpos += m->m_epg_1st_off; if (offs > 0) { nd->nd_bpos += offs; nd->nd_bextpgsiz = rlen - offs; } else if (nd->nd_bextpg == 0) nd->nd_bextpgsiz = PAGE_SIZE - m->m_epg_1st_off; else nd->nd_bextpgsiz = PAGE_SIZE; } else nd->nd_bpos = mtod(m, char *) + offs; } /* * Grow a ext_pgs mbuf list. Either allocate another page or add * an mbuf to the list. */ struct mbuf * nfsm_add_ext_pgs(struct mbuf *m, int maxextsiz, int *bextpg) { struct mbuf *mp; vm_page_t pg; if ((m->m_epg_npgs + 1) * PAGE_SIZE > maxextsiz) { mp = mb_alloc_ext_plus_pages(PAGE_SIZE, M_WAITOK); *bextpg = 0; m->m_next = mp; } else { do { pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP | VM_ALLOC_WIRED); if (pg == NULL) vm_wait(NULL); } while (pg == NULL); m->m_epg_pa[m->m_epg_npgs] = VM_PAGE_TO_PHYS(pg); *bextpg = m->m_epg_npgs; m->m_epg_npgs++; m->m_epg_last_len = 0; mp = m; } return (mp); } diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h index 62d86c3a4593..13e146154805 100644 --- a/sys/fs/nfs/nfsproto.h +++ b/sys/fs/nfs/nfsproto.h @@ -1,1522 +1,1521 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSPROTO_H_ #define _NFS_NFSPROTO_H_ /* * nfs definitions as per the Version 2, 3 and 4 specs */ /* * Constants as defined in the NFS Version 2, 3 and 4 specs. * "NFS: Network File System Protocol Specification" RFC1094 * and in the "NFS: Network File System Version 3 Protocol * Specification" */ #define NFS_PORT 2049 #define NFS_PROG 100003 #define NFS_CALLBCKPROG 0x40000000 /* V4 only */ #define NFS_VER2 2 #define NFS_VER3 3 #define NFS_VER4 4 #define NFS_V2MAXDATA 8192 #define NFS_MAXDGRAMDATA 16384 #define NFS_MAXPATHLEN 1024 #define NFS_MAXNAMLEN 255 /* * Calculating the maximum XDR overhead for an NFS RPC isn't easy. * NFS_MAXPKTHDR is antiquated and assumes AUTH_SYS over UDP. * NFS_MAXXDR should be sufficient for all NFS versions over TCP. * It includes: * - Maximum RPC message header. It can include 2 400byte authenticators plus * a machine name of unlimited length, although it is usually relatively * small. * - XDR overheads for the NFSv4 compound. This can include Owner and * Owner_group strings, which are usually fairly small, but are allowed * to be up to 1024 bytes each. * 4096 is overkill, but should always be sufficient. */ #define NFS_MAXPKTHDR 404 #define NFS_MAXXDR 4096 -#define NFS_MAXPACKET (NFS_SRVMAXIO + NFS_MAXXDR) #define NFS_MINPACKET 20 #define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */ #define NFSV4_MINORVERSION 0 /* V4 Minor version */ #define NFSV41_MINORVERSION 1 /* V4 Minor version */ #define NFSV42_MINORVERSION 2 /* V4 Minor version */ #define NFSV4_CBVERS 1 /* V4 CB Version */ #define NFSV41_CBVERS 4 /* V4.1 CB Version */ #define NFSV4_SMALLSTR 50 /* Strings small enough for stack */ /* * This value isn't a fixed value in the RFCs. * It is the maximum data size supported by NFSv3 or NFSv4 over TCP for * the server. It should be set to the I/O size preferred by ZFS or * MAXBSIZE, whichever is greater. * ZFS currently prefers 128K. * It used to be called NFS_MAXDATA, but has been renamed to clarify that * it refers to server side only and doesn't conflict with the NFS_MAXDATA * defined in rpcsvc/nfs_prot.h for userland. */ #define NFS_SRVMAXIO (128 * 1024) /* Stat numbers for rpc returns (version 2, 3 and 4) */ /* * These numbers are hard-wired in the RFCs, so they can't be changed. * The code currently assumes that the ones < 10000 are the same as * sys/errno.h and that sys/errno.h will never go as high as 10000. * If the value in sys/errno.h of any entry listed below is changed, * the NFS code must be modified to do the mapping between them. * (You can ignore NFSERR_WFLUSH, since it is never actually used.) */ #define NFSERR_OK 0 #define NFSERR_PERM 1 #define NFSERR_NOENT 2 #define NFSERR_IO 5 #define NFSERR_NXIO 6 #define NFSERR_ACCES 13 #define NFSERR_EXIST 17 #define NFSERR_XDEV 18 /* Version 3, 4 only */ #define NFSERR_NODEV 19 #define NFSERR_NOTDIR 20 #define NFSERR_ISDIR 21 #define NFSERR_INVAL 22 /* Version 3, 4 only */ #define NFSERR_FBIG 27 #define NFSERR_NOSPC 28 #define NFSERR_ROFS 30 #define NFSERR_MLINK 31 /* Version 3, 4 only */ #define NFSERR_NAMETOL 63 #define NFSERR_NOTEMPTY 66 #define NFSERR_DQUOT 69 #define NFSERR_STALE 70 #define NFSERR_REMOTE 71 /* Version 3 only */ #define NFSERR_WFLUSH 99 /* Version 2 only */ #define NFSERR_BADHANDLE 10001 /* These are Version 3, 4 only */ #define NFSERR_NOT_SYNC 10002 /* Version 3 Only */ #define NFSERR_BAD_COOKIE 10003 #define NFSERR_NOTSUPP 10004 #define NFSERR_TOOSMALL 10005 #define NFSERR_SERVERFAULT 10006 #define NFSERR_BADTYPE 10007 #define NFSERR_DELAY 10008 /* Called NFSERR_JUKEBOX for V3 */ #define NFSERR_SAME 10009 /* These are Version 4 only */ #define NFSERR_DENIED 10010 #define NFSERR_EXPIRED 10011 #define NFSERR_LOCKED 10012 #define NFSERR_GRACE 10013 #define NFSERR_FHEXPIRED 10014 #define NFSERR_SHAREDENIED 10015 #define NFSERR_WRONGSEC 10016 #define NFSERR_CLIDINUSE 10017 #define NFSERR_RESOURCE 10018 #define NFSERR_MOVED 10019 #define NFSERR_NOFILEHANDLE 10020 #define NFSERR_MINORVERMISMATCH 10021 #define NFSERR_STALECLIENTID 10022 #define NFSERR_STALESTATEID 10023 #define NFSERR_OLDSTATEID 10024 #define NFSERR_BADSTATEID 10025 #define NFSERR_BADSEQID 10026 #define NFSERR_NOTSAME 10027 #define NFSERR_LOCKRANGE 10028 #define NFSERR_SYMLINK 10029 #define NFSERR_RESTOREFH 10030 #define NFSERR_LEASEMOVED 10031 #define NFSERR_ATTRNOTSUPP 10032 #define NFSERR_NOGRACE 10033 #define NFSERR_RECLAIMBAD 10034 #define NFSERR_RECLAIMCONFLICT 10035 #define NFSERR_BADXDR 10036 #define NFSERR_LOCKSHELD 10037 #define NFSERR_OPENMODE 10038 #define NFSERR_BADOWNER 10039 #define NFSERR_BADCHAR 10040 #define NFSERR_BADNAME 10041 #define NFSERR_BADRANGE 10042 #define NFSERR_LOCKNOTSUPP 10043 #define NFSERR_OPILLEGAL 10044 #define NFSERR_DEADLOCK 10045 #define NFSERR_FILEOPEN 10046 #define NFSERR_ADMINREVOKED 10047 #define NFSERR_CBPATHDOWN 10048 /* NFSv4.1 specific errors. */ #define NFSERR_BADIOMODE 10049 #define NFSERR_BADLAYOUT 10050 #define NFSERR_BADSESSIONDIGEST 10051 #define NFSERR_BADSESSION 10052 #define NFSERR_BADSLOT 10053 #define NFSERR_COMPLETEALREADY 10054 #define NFSERR_NOTBNDTOSESS 10055 #define NFSERR_DELEGALREADYWANT 10056 #define NFSERR_BACKCHANBUSY 10057 #define NFSERR_LAYOUTTRYLATER 10058 #define NFSERR_LAYOUTUNAVAIL 10059 #define NFSERR_NOMATCHLAYOUT 10060 #define NFSERR_RECALLCONFLICT 10061 #define NFSERR_UNKNLAYOUTTYPE 10062 #define NFSERR_SEQMISORDERED 10063 #define NFSERR_SEQUENCEPOS 10064 #define NFSERR_REQTOOBIG 10065 #define NFSERR_REPTOOBIG 10066 #define NFSERR_REPTOOBIGTOCACHE 10067 #define NFSERR_RETRYUNCACHEDREP 10068 #define NFSERR_UNSAFECOMPOUND 10069 #define NFSERR_TOOMANYOPS 10070 #define NFSERR_OPNOTINSESS 10071 #define NFSERR_HASHALGUNSUPP 10072 #define NFSERR_CLIENTIDBUSY 10074 #define NFSERR_PNFSIOHOLE 10075 #define NFSERR_SEQFALSERETRY 10076 #define NFSERR_BADHIGHSLOT 10077 #define NFSERR_DEADSESSION 10078 #define NFSERR_ENCRALGUNSUPP 10079 #define NFSERR_PNFSNOLAYOUT 10080 #define NFSERR_NOTONLYOP 10081 #define NFSERR_WRONGCRED 10082 #define NFSERR_WRONGTYPE 10083 #define NFSERR_DIRDELEGUNAVAIL 10084 #define NFSERR_REJECTDELEG 10085 #define NFSERR_RETURNCONFLICT 10086 #define NFSERR_DELEGREVOKED 10087 /* NFSv4.2 specific errors. */ #define NFSERR_PARTNERNOTSUPP 10088 #define NFSERR_PARTNERNOAUTH 10089 #define NFSERR_UNIONNOTSUPP 10090 #define NFSERR_OFFLOADDENIED 10091 #define NFSERR_WRONGLFS 10092 #define NFSERR_BADLABEL 10093 #define NFSERR_OFFLOADNOREQS 10094 /* NFSv4.2 Extended Attribute errors. */ #define NFSERR_NOXATTR 10095 #define NFSERR_XATTR2BIG 10096 /* Maximum value of all the NFS error values. */ #define NFSERR_MAXERRVAL NFSERR_XATTR2BIG #define NFSERR_STALEWRITEVERF 30001 /* Fake return for nfs_commit() */ #define NFSERR_DONTREPLY 30003 /* Don't process request */ #define NFSERR_RETVOID 30004 /* Return void, not error */ #define NFSERR_REPLYFROMCACHE 30005 /* Reply from recent request cache */ #define NFSERR_STALEDONTRECOVER 30006 /* Don't initiate recovery */ #define NFSERR_RPCERR 0x40000000 /* Mark an RPC layer error */ #define NFSERR_AUTHERR 0x80000000 /* Mark an authentication error */ #define NFSERR_RPCMISMATCH (NFSERR_RPCERR | RPC_MISMATCH) #define NFSERR_PROGUNAVAIL (NFSERR_RPCERR | RPC_PROGUNAVAIL) #define NFSERR_PROGMISMATCH (NFSERR_RPCERR | RPC_PROGMISMATCH) #define NFSERR_PROGNOTV4 (NFSERR_RPCERR | 0xffff) #define NFSERR_PROCUNAVAIL (NFSERR_RPCERR | RPC_PROCUNAVAIL) #define NFSERR_GARBAGE (NFSERR_RPCERR | RPC_GARBAGE) /* Sizes in bytes of various nfs rpc components */ #define NFSX_UNSIGNED 4 #define NFSX_HYPER (2 * NFSX_UNSIGNED) /* specific to NFS Version 2 */ #define NFSX_V2FH 32 #define NFSX_V2FATTR 68 #define NFSX_V2SATTR 32 #define NFSX_V2COOKIE 4 #define NFSX_V2STATFS 20 /* specific to NFS Version 3 */ #define NFSX_V3FHMAX 64 /* max. allowed by protocol */ #define NFSX_V3FATTR 84 #define NFSX_V3SATTR 60 /* max. all fields filled in */ #define NFSX_V3SRVSATTR (sizeof (struct nfsv3_sattr)) #define NFSX_V3POSTOPATTR (NFSX_V3FATTR + NFSX_UNSIGNED) #define NFSX_V3WCCDATA (NFSX_V3POSTOPATTR + 8 * NFSX_UNSIGNED) #define NFSX_V3STATFS 52 #define NFSX_V3FSINFO 48 #define NFSX_V3PATHCONF 24 /* specific to NFS Version 4 */ #define NFSX_V4FHMAX 128 #define NFSX_V4FSID (2 * NFSX_HYPER) #define NFSX_V4SPECDATA (2 * NFSX_UNSIGNED) #define NFSX_V4TIME (NFSX_HYPER + NFSX_UNSIGNED) #define NFSX_V4SETTIME (NFSX_UNSIGNED + NFSX_V4TIME) #define NFSX_V4SESSIONID 16 #define NFSX_V4DEVICEID 16 #define NFSX_V4PNFSFH (sizeof(fhandle_t) + 1) #define NFSX_V4FILELAYOUT (4 * NFSX_UNSIGNED + NFSX_V4DEVICEID + \ NFSX_HYPER + NFSM_RNDUP(NFSX_V4PNFSFH)) #define NFSX_V4FLEXLAYOUT(m) (NFSX_HYPER + 3 * NFSX_UNSIGNED + \ ((m) * (NFSX_V4DEVICEID + NFSX_STATEID + NFSM_RNDUP(NFSX_V4PNFSFH) + \ 8 * NFSX_UNSIGNED))) /* sizes common to multiple NFS versions */ #define NFSX_FHMAX (NFSX_V4FHMAX) #define NFSX_MYFH (sizeof (fhandle_t)) /* size this server uses */ #define NFSX_VERF 8 #define NFSX_STATEIDOTHER 12 #define NFSX_STATEID (NFSX_UNSIGNED + NFSX_STATEIDOTHER) #define NFSX_GSSH 12 /* variants for multiple versions */ #define NFSX_STATFS(v3) ((v3) ? NFSX_V3STATFS : NFSX_V2STATFS) /* * Beware. NFSPROC_NULL and friends are defined in * as well and the numbers are different. */ #ifndef NFSPROC_NULL /* nfs rpc procedure numbers (before version mapping) */ #define NFSPROC_NULL 0 #define NFSPROC_GETATTR 1 #define NFSPROC_SETATTR 2 #define NFSPROC_LOOKUP 3 #define NFSPROC_ACCESS 4 #define NFSPROC_READLINK 5 #define NFSPROC_READ 6 #define NFSPROC_WRITE 7 #define NFSPROC_CREATE 8 #define NFSPROC_MKDIR 9 #define NFSPROC_SYMLINK 10 #define NFSPROC_MKNOD 11 #define NFSPROC_REMOVE 12 #define NFSPROC_RMDIR 13 #define NFSPROC_RENAME 14 #define NFSPROC_LINK 15 #define NFSPROC_READDIR 16 #define NFSPROC_READDIRPLUS 17 #define NFSPROC_FSSTAT 18 #define NFSPROC_FSINFO 19 #define NFSPROC_PATHCONF 20 #define NFSPROC_COMMIT 21 #endif /* NFSPROC_NULL */ /* * The lower numbers -> 21 are used by NFSv2 and v3. These define higher * numbers used by NFSv4. * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is * one greater than the last number. */ #ifndef NFS_V3NPROCS #define NFS_V3NPROCS 22 #define NFSPROC_LOOKUPP 22 #define NFSPROC_SETCLIENTID 23 #define NFSPROC_SETCLIENTIDCFRM 24 #define NFSPROC_LOCK 25 #define NFSPROC_LOCKU 26 #define NFSPROC_OPEN 27 #define NFSPROC_CLOSE 28 #define NFSPROC_OPENCONFIRM 29 #define NFSPROC_LOCKT 30 #define NFSPROC_OPENDOWNGRADE 31 #define NFSPROC_RENEW 32 #define NFSPROC_PUTROOTFH 33 #define NFSPROC_RELEASELCKOWN 34 #define NFSPROC_DELEGRETURN 35 #define NFSPROC_RETDELEGREMOVE 36 #define NFSPROC_RETDELEGRENAME1 37 #define NFSPROC_RETDELEGRENAME2 38 #define NFSPROC_GETACL 39 #define NFSPROC_SETACL 40 /* * Must be defined as one higher than the last Proc# above. */ #define NFSV4_NPROCS 41 /* Additional procedures for NFSv4.1. */ #define NFSPROC_EXCHANGEID 41 #define NFSPROC_CREATESESSION 42 #define NFSPROC_DESTROYSESSION 43 #define NFSPROC_DESTROYCLIENT 44 #define NFSPROC_FREESTATEID 45 #define NFSPROC_LAYOUTGET 46 #define NFSPROC_GETDEVICEINFO 47 #define NFSPROC_LAYOUTCOMMIT 48 #define NFSPROC_LAYOUTRETURN 49 #define NFSPROC_RECLAIMCOMPL 50 #define NFSPROC_WRITEDS 51 #define NFSPROC_READDS 52 #define NFSPROC_COMMITDS 53 #define NFSPROC_OPENLAYGET 54 #define NFSPROC_CREATELAYGET 55 /* * Must be defined as one higher than the last NFSv4.1 Proc# above. */ #define NFSV41_NPROCS 56 /* Additional procedures for NFSv4.2. */ #define NFSPROC_IOADVISE 56 #define NFSPROC_ALLOCATE 57 #define NFSPROC_COPY 58 #define NFSPROC_SEEK 59 #define NFSPROC_SEEKDS 60 /* and the ones for the optional Extended attribute support (RFC-8276). */ #define NFSPROC_GETEXTATTR 61 #define NFSPROC_SETEXTATTR 62 #define NFSPROC_RMEXTATTR 63 #define NFSPROC_LISTEXTATTR 64 /* BindConnectionToSession, done by the krpc for a new connection. */ #define NFSPROC_BINDCONNTOSESS 65 /* * Must be defined as one higher than the last NFSv4.2 Proc# above. */ #define NFSV42_NPROCS 66 #endif /* NFS_V3NPROCS */ /* * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code. */ #ifndef NFS_NPROCS #define NFS_NPROCS NFSV4_NPROCS #endif /* * NFSPROC_NOOP is a fake op# that can't be the same as any V2/3/4 Procedure * or Operation#. Since the NFS V4 Op #s go higher, use NFSV42_NOPS, which * is one greater than the highest Op#. */ #define NFSPROC_NOOP NFSV42_NOPS /* Actual Version 2 procedure numbers */ #define NFSV2PROC_NULL 0 #define NFSV2PROC_GETATTR 1 #define NFSV2PROC_SETATTR 2 #define NFSV2PROC_NOOP 3 #define NFSV2PROC_ROOT NFSV2PROC_NOOP /* Obsolete */ #define NFSV2PROC_LOOKUP 4 #define NFSV2PROC_READLINK 5 #define NFSV2PROC_READ 6 #define NFSV2PROC_WRITECACHE NFSV2PROC_NOOP /* Obsolete */ #define NFSV2PROC_WRITE 8 #define NFSV2PROC_CREATE 9 #define NFSV2PROC_REMOVE 10 #define NFSV2PROC_RENAME 11 #define NFSV2PROC_LINK 12 #define NFSV2PROC_SYMLINK 13 #define NFSV2PROC_MKDIR 14 #define NFSV2PROC_RMDIR 15 #define NFSV2PROC_READDIR 16 #define NFSV2PROC_STATFS 17 /* * V4 Procedure numbers */ #define NFSV4PROC_COMPOUND 1 #define NFSV4PROC_CBNULL 0 #define NFSV4PROC_CBCOMPOUND 1 /* * Constants used by the Version 3 and 4 protocols for various RPCs */ #define NFSV3SATTRTIME_DONTCHANGE 0 #define NFSV3SATTRTIME_TOSERVER 1 #define NFSV3SATTRTIME_TOCLIENT 2 #define NFSV4SATTRTIME_TOSERVER 0 #define NFSV4SATTRTIME_TOCLIENT 1 #define NFSV4LOCKT_READ 1 #define NFSV4LOCKT_WRITE 2 #define NFSV4LOCKT_READW 3 #define NFSV4LOCKT_WRITEW 4 #define NFSV4LOCKT_RELEASE 5 #define NFSV4OPEN_NOCREATE 0 #define NFSV4OPEN_CREATE 1 #define NFSV4OPEN_CLAIMNULL 0 #define NFSV4OPEN_CLAIMPREVIOUS 1 #define NFSV4OPEN_CLAIMDELEGATECUR 2 #define NFSV4OPEN_CLAIMDELEGATEPREV 3 #define NFSV4OPEN_CLAIMFH 4 #define NFSV4OPEN_CLAIMDELEGATECURFH 5 #define NFSV4OPEN_CLAIMDELEGATEPREVFH 6 #define NFSV4OPEN_DELEGATENONE 0 #define NFSV4OPEN_DELEGATEREAD 1 #define NFSV4OPEN_DELEGATEWRITE 2 #define NFSV4OPEN_DELEGATENONEEXT 3 #define NFSV4OPEN_LIMITSIZE 1 #define NFSV4OPEN_LIMITBLOCKS 2 /* * Nfs V4 ACE stuff */ #define NFSV4ACE_ALLOWEDTYPE 0x00000000 #define NFSV4ACE_DENIEDTYPE 0x00000001 #define NFSV4ACE_AUDITTYPE 0x00000002 #define NFSV4ACE_ALARMTYPE 0x00000003 #define NFSV4ACE_SUPALLOWED 0x00000001 #define NFSV4ACE_SUPDENIED 0x00000002 #define NFSV4ACE_SUPAUDIT 0x00000004 #define NFSV4ACE_SUPALARM 0x00000008 #define NFSV4ACE_SUPTYPES (NFSV4ACE_SUPALLOWED | NFSV4ACE_SUPDENIED) #define NFSV4ACE_FILEINHERIT 0x00000001 #define NFSV4ACE_DIRECTORYINHERIT 0x00000002 #define NFSV4ACE_NOPROPAGATEINHERIT 0x00000004 #define NFSV4ACE_INHERITONLY 0x00000008 #define NFSV4ACE_SUCCESSFULACCESS 0x00000010 #define NFSV4ACE_FAILEDACCESS 0x00000020 #define NFSV4ACE_IDENTIFIERGROUP 0x00000040 #define NFSV4ACE_READDATA 0x00000001 #define NFSV4ACE_LISTDIRECTORY 0x00000001 #define NFSV4ACE_WRITEDATA 0x00000002 #define NFSV4ACE_ADDFILE 0x00000002 #define NFSV4ACE_APPENDDATA 0x00000004 #define NFSV4ACE_ADDSUBDIRECTORY 0x00000004 #define NFSV4ACE_READNAMEDATTR 0x00000008 #define NFSV4ACE_WRITENAMEDATTR 0x00000010 #define NFSV4ACE_EXECUTE 0x00000020 #define NFSV4ACE_SEARCH 0x00000020 #define NFSV4ACE_DELETECHILD 0x00000040 #define NFSV4ACE_READATTRIBUTES 0x00000080 #define NFSV4ACE_WRITEATTRIBUTES 0x00000100 #define NFSV4ACE_DELETE 0x00010000 #define NFSV4ACE_READACL 0x00020000 #define NFSV4ACE_WRITEACL 0x00040000 #define NFSV4ACE_WRITEOWNER 0x00080000 #define NFSV4ACE_SYNCHRONIZE 0x00100000 /* * Here are the mappings between mode bits and acl mask bits for * directories and other files. * (Named attributes have not been included, since named attributes are * not yet supported.) * The mailing list seems to indicate that NFSV4ACE_EXECUTE refers to * searching a directory, although I can't find a statement of that in * the RFC. */ #define NFSV4ACE_ALLFILESMASK (NFSV4ACE_READATTRIBUTES | NFSV4ACE_READACL) #define NFSV4ACE_OWNERMASK (NFSV4ACE_WRITEATTRIBUTES | NFSV4ACE_WRITEACL) #define NFSV4ACE_DIRREADMASK NFSV4ACE_LISTDIRECTORY #define NFSV4ACE_DIREXECUTEMASK NFSV4ACE_EXECUTE #define NFSV4ACE_DIRWRITEMASK (NFSV4ACE_ADDFILE | \ NFSV4ACE_ADDSUBDIRECTORY | NFSV4ACE_DELETECHILD) #define NFSV4ACE_READMASK NFSV4ACE_READDATA #define NFSV4ACE_WRITEMASK (NFSV4ACE_WRITEDATA | NFSV4ACE_APPENDDATA) #define NFSV4ACE_EXECUTEMASK NFSV4ACE_EXECUTE #define NFSV4ACE_ALLFILEBITS (NFSV4ACE_READMASK | NFSV4ACE_WRITEMASK | \ NFSV4ACE_EXECUTEMASK | NFSV4ACE_SYNCHRONIZE) #define NFSV4ACE_ALLDIRBITS (NFSV4ACE_DIRREADMASK | \ NFSV4ACE_DIRWRITEMASK | NFSV4ACE_DIREXECUTEMASK) #define NFSV4ACE_AUDITMASK 0x0 /* * These GENERIC masks are not used and are no longer believed to be useful. */ #define NFSV4ACE_GENERICREAD 0x00120081 #define NFSV4ACE_GENERICWRITE 0x00160106 #define NFSV4ACE_GENERICEXECUTE 0x001200a0 #define NFSSTATEID_PUTALLZERO 0 #define NFSSTATEID_PUTALLONE 1 #define NFSSTATEID_PUTSTATEID 2 #define NFSSTATEID_PUTSEQIDZERO 3 /* * Bits for share access and deny. */ #define NFSV4OPEN_ACCESSREAD 0x00000001 #define NFSV4OPEN_ACCESSWRITE 0x00000002 #define NFSV4OPEN_ACCESSBOTH 0x00000003 #define NFSV4OPEN_WANTDELEGMASK 0x0000ff00 #define NFSV4OPEN_WANTREADDELEG 0x00000100 #define NFSV4OPEN_WANTWRITEDELEG 0x00000200 #define NFSV4OPEN_WANTANYDELEG 0x00000300 #define NFSV4OPEN_WANTNODELEG 0x00000400 #define NFSV4OPEN_WANTCANCEL 0x00000500 #define NFSV4OPEN_WANTSIGNALDELEG 0x00010000 #define NFSV4OPEN_WANTPUSHDELEG 0x00020000 #define NFSV4OPEN_DENYNONE 0x00000000 #define NFSV4OPEN_DENYREAD 0x00000001 #define NFSV4OPEN_DENYWRITE 0x00000002 #define NFSV4OPEN_DENYBOTH 0x00000003 /* * Delegate_none_ext reply values. */ #define NFSV4OPEN_NOTWANTED 0 #define NFSV4OPEN_CONTENTION 1 #define NFSV4OPEN_RESOURCE 2 #define NFSV4OPEN_NOTSUPPFTYPE 3 #define NFSV4OPEN_NOTSUPPWRITEFTYPE 4 #define NFSV4OPEN_NOTSUPPUPGRADE 5 #define NFSV4OPEN_NOTSUPPDOWNGRADE 6 #define NFSV4OPEN_CANCELLED 7 #define NFSV4OPEN_ISDIR 8 /* * Open result flags * (The first four are in the spec. The rest are used internally.) */ #define NFSV4OPEN_RESULTCONFIRM 0x00000002 #define NFSV4OPEN_LOCKTYPEPOSIX 0x00000004 #define NFSV4OPEN_PRESERVEUNLINKED 0x00000008 #define NFSV4OPEN_MAYNOTIFYLOCK 0x00000020 #define NFSV4OPEN_RFLAGS \ (NFSV4OPEN_RESULTCONFIRM | NFSV4OPEN_LOCKTYPEPOSIX | \ NFSV4OPEN_PRESERVEUNLINKED | NFSV4OPEN_MAYNOTIFYLOCK) #define NFSV4OPEN_RECALL 0x00010000 #define NFSV4OPEN_READDELEGATE 0x00020000 #define NFSV4OPEN_WRITEDELEGATE 0x00040000 #define NFSV4OPEN_WDRESOURCE 0x00080000 #define NFSV4OPEN_WDCONTENTION 0x00100000 #define NFSV4OPEN_WDNOTWANTED 0x00200000 #define NFSV4OPEN_WDSUPPFTYPE 0x00400000 /* * NFS V4 File Handle types */ #define NFSV4FHTYPE_PERSISTENT 0x0 #define NFSV4FHTYPE_NOEXPIREWITHOPEN 0x1 #define NFSV4FHTYPE_VOLATILEANY 0x2 #define NFSV4FHTYPE_VOLATILEMIGRATE 0x4 #define NFSV4FHTYPE_VOLATILERENAME 0x8 /* * Maximum size of V4 opaque strings. */ #define NFSV4_OPAQUELIMIT 1024 /* * These are the same for V3 and V4. */ #define NFSACCESS_READ 0x01 #define NFSACCESS_LOOKUP 0x02 #define NFSACCESS_MODIFY 0x04 #define NFSACCESS_EXTEND 0x08 #define NFSACCESS_DELETE 0x10 #define NFSACCESS_EXECUTE 0x20 /* Additional Extended Attribute access bits RFC-8276. */ #define NFSACCESS_XAREAD 0x40 #define NFSACCESS_XAWRITE 0x80 #define NFSACCESS_XALIST 0x100 #define NFSWRITE_UNSTABLE 0 #define NFSWRITE_DATASYNC 1 #define NFSWRITE_FILESYNC 2 #define NFSCREATE_UNCHECKED 0 #define NFSCREATE_GUARDED 1 #define NFSCREATE_EXCLUSIVE 2 #define NFSCREATE_EXCLUSIVE41 3 #define NFSV3FSINFO_LINK 0x01 #define NFSV3FSINFO_SYMLINK 0x02 #define NFSV3FSINFO_HOMOGENEOUS 0x08 #define NFSV3FSINFO_CANSETTIME 0x10 /* Flags for Exchange ID */ #define NFSV4EXCH_SUPPMOVEDREFER 0x00000001 #define NFSV4EXCH_SUPPMOVEDMIGR 0x00000002 #define NFSV4EXCH_BINDPRINCSTATEID 0x00000100 #define NFSV4EXCH_USENONPNFS 0x00010000 #define NFSV4EXCH_USEPNFSMDS 0x00020000 #define NFSV4EXCH_USEPNFSDS 0x00040000 #define NFSV4EXCH_MASKPNFS 0x00070000 #define NFSV4EXCH_UPDCONFIRMEDRECA 0x40000000 #define NFSV4EXCH_CONFIRMEDR 0x80000000 /* State Protects */ #define NFSV4EXCH_SP4NONE 0 #define NFSV4EXCH_SP4MACHCRED 1 #define NFSV4EXCH_SP4SSV 2 /* Flags for Create Session */ #define NFSV4CRSESS_PERSIST 0x00000001 #define NFSV4CRSESS_CONNBACKCHAN 0x00000002 #define NFSV4CRSESS_CONNRDMA 0x00000004 /* Flags for Sequence */ #define NFSV4SEQ_CBPATHDOWN 0x00000001 #define NFSV4SEQ_CBGSSCONTEXPIRING 0x00000002 #define NFSV4SEQ_CBGSSCONTEXPIRED 0x00000004 #define NFSV4SEQ_EXPIREDALLSTATEREVOKED 0x00000008 #define NFSV4SEQ_EXPIREDSOMESTATEREVOKED 0x00000010 #define NFSV4SEQ_ADMINSTATEREVOKED 0x00000020 #define NFSV4SEQ_RECALLABLESTATEREVOKED 0x00000040 #define NFSV4SEQ_LEASEMOVED 0x00000080 #define NFSV4SEQ_RESTARTRECLAIMNEEDED 0x00000100 #define NFSV4SEQ_CBPATHDOWNSESSION 0x00000200 #define NFSV4SEQ_BACKCHANNELFAULT 0x00000400 #define NFSV4SEQ_DEVIDCHANGED 0x00000800 #define NFSV4SEQ_DEVIDDELETED 0x00001000 /* Flags for Layout. */ #define NFSLAYOUTRETURN_FILE 1 #define NFSLAYOUTRETURN_FSID 2 #define NFSLAYOUTRETURN_ALL 3 #define NFSLAYOUT_NFSV4_1_FILES 0x1 #define NFSLAYOUT_OSD2_OBJECTS 0x2 #define NFSLAYOUT_BLOCK_VOLUME 0x3 #define NFSLAYOUT_FLEXFILE 0x4 #define NFSLAYOUTIOMODE_READ 1 #define NFSLAYOUTIOMODE_RW 2 #define NFSLAYOUTIOMODE_ANY 3 /* Flags for Get Device Info. */ #define NFSDEVICEIDNOTIFY_CHANGEBIT 0x1 #define NFSDEVICEIDNOTIFY_DELETEBIT 0x2 /* Flags for File Layout. */ #define NFSFLAYUTIL_DENSE 0x1 #define NFSFLAYUTIL_COMMIT_THRU_MDS 0x2 #define NFSFLAYUTIL_IOADVISE_THRU_MDS 0x4 #define NFSFLAYUTIL_STRIPE_MASK 0xffffffc0 /* Flags for Flex File Layout. */ #define NFSFLEXFLAG_NO_LAYOUTCOMMIT 0x00000001 #define NFSFLEXFLAG_NOIO_MDS 0x00000002 #define NFSFLEXFLAG_NO_READIO 0x00000004 #define NFSFLEXFLAG_WRITE_ONEMIRROR 0x00000008 /* Enum values for Bind Connection to Session. */ #define NFSCDFC4_FORE 0x1 #define NFSCDFC4_BACK 0x2 #define NFSCDFC4_FORE_OR_BOTH 0x3 #define NFSCDFC4_BACK_OR_BOTH 0x7 #define NFSCDFS4_FORE 0x1 #define NFSCDFS4_BACK 0x2 #define NFSCDFS4_BOTH 0x3 /* Enum values for Secinfo_no_name. */ #define NFSSECINFONONAME_CURFH 0 #define NFSSECINFONONAME_PARENT 1 #if defined(_KERNEL) || defined(KERNEL) /* Conversion macros */ #define vtonfsv2_mode(t,m) \ txdr_unsigned(((t) == VFIFO) ? MAKEIMODE(VCHR, (m)) : \ MAKEIMODE((t), (m))) #define vtonfsv34_mode(m) txdr_unsigned((m) & 07777) #define nfstov_mode(a) (fxdr_unsigned(u_int16_t, (a))&07777) #define vtonfsv2_type(a) (((u_int32_t)(a)) >= 9 ? txdr_unsigned(NFNON) : \ txdr_unsigned(newnfsv2_type[((u_int32_t)(a))])) #define vtonfsv34_type(a) (((u_int32_t)(a)) >= 9 ? txdr_unsigned(NFNON) : \ txdr_unsigned(nfsv34_type[((u_int32_t)(a))])) #define nfsv2tov_type(a) newnv2tov_type[fxdr_unsigned(u_int32_t,(a))&0x7] #define nfsv34tov_type(a) nv34tov_type[fxdr_unsigned(u_int32_t,(a))&0x7] #define vtonfs_dtype(a) (((u_int32_t)(a)) >= 9 ? IFTODT(VTTOIF(VNON)) : \ IFTODT(VTTOIF(a))) /* File types */ typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5, NFSOCK=6, NFFIFO=7, NFATTRDIR=8, NFNAMEDATTR=9 } nfstype; /* Structs for common parts of the rpc's */ struct nfsv2_time { u_int32_t nfsv2_sec; u_int32_t nfsv2_usec; }; typedef struct nfsv2_time nfstime2; struct nfsv3_time { u_int32_t nfsv3_sec; u_int32_t nfsv3_nsec; }; typedef struct nfsv3_time nfstime3; struct nfsv4_time { u_int32_t nfsv4_highsec; u_int32_t nfsv4_sec; u_int32_t nfsv4_nsec; }; typedef struct nfsv4_time nfstime4; /* * Quads are defined as arrays of 2 longs to ensure dense packing for the * protocol and to facilitate xdr conversion. */ struct nfs_uquad { u_int32_t nfsuquad[2]; }; typedef struct nfs_uquad nfsuint64; /* * Used to convert between two u_longs and a u_quad_t. */ union nfs_quadconvert { u_int32_t lval[2]; u_quad_t qval; }; typedef union nfs_quadconvert nfsquad_t; /* * NFS Version 3 special file number. */ struct nfsv3_spec { u_int32_t specdata1; u_int32_t specdata2; }; typedef struct nfsv3_spec nfsv3spec; /* * File attributes and setable attributes. These structures cover both * NFS version 2 and the version 3 protocol. Note that the union is only * used so that one pointer can refer to both variants. These structures * go out on the wire and must be densely packed, so no quad data types * are used. (all fields are longs or u_longs or structures of same) * NB: You can't do sizeof(struct nfs_fattr), you must use the * NFSX_FATTR(v3) macro. */ struct nfs_fattr { u_int32_t fa_type; u_int32_t fa_mode; u_int32_t fa_nlink; u_int32_t fa_uid; u_int32_t fa_gid; union { struct { u_int32_t nfsv2fa_size; u_int32_t nfsv2fa_blocksize; u_int32_t nfsv2fa_rdev; u_int32_t nfsv2fa_blocks; u_int32_t nfsv2fa_fsid; u_int32_t nfsv2fa_fileid; nfstime2 nfsv2fa_atime; nfstime2 nfsv2fa_mtime; nfstime2 nfsv2fa_ctime; } fa_nfsv2; struct { nfsuint64 nfsv3fa_size; nfsuint64 nfsv3fa_used; nfsv3spec nfsv3fa_rdev; nfsuint64 nfsv3fa_fsid; nfsuint64 nfsv3fa_fileid; nfstime3 nfsv3fa_atime; nfstime3 nfsv3fa_mtime; nfstime3 nfsv3fa_ctime; } fa_nfsv3; } fa_un; }; /* and some ugly defines for accessing union components */ #define fa2_size fa_un.fa_nfsv2.nfsv2fa_size #define fa2_blocksize fa_un.fa_nfsv2.nfsv2fa_blocksize #define fa2_rdev fa_un.fa_nfsv2.nfsv2fa_rdev #define fa2_blocks fa_un.fa_nfsv2.nfsv2fa_blocks #define fa2_fsid fa_un.fa_nfsv2.nfsv2fa_fsid #define fa2_fileid fa_un.fa_nfsv2.nfsv2fa_fileid #define fa2_atime fa_un.fa_nfsv2.nfsv2fa_atime #define fa2_mtime fa_un.fa_nfsv2.nfsv2fa_mtime #define fa2_ctime fa_un.fa_nfsv2.nfsv2fa_ctime #define fa3_size fa_un.fa_nfsv3.nfsv3fa_size #define fa3_used fa_un.fa_nfsv3.nfsv3fa_used #define fa3_rdev fa_un.fa_nfsv3.nfsv3fa_rdev #define fa3_fsid fa_un.fa_nfsv3.nfsv3fa_fsid #define fa3_fileid fa_un.fa_nfsv3.nfsv3fa_fileid #define fa3_atime fa_un.fa_nfsv3.nfsv3fa_atime #define fa3_mtime fa_un.fa_nfsv3.nfsv3fa_mtime #define fa3_ctime fa_un.fa_nfsv3.nfsv3fa_ctime #define NFS_LINK_MAX UINT32_MAX struct nfsv2_sattr { u_int32_t sa_mode; u_int32_t sa_uid; u_int32_t sa_gid; u_int32_t sa_size; nfstime2 sa_atime; nfstime2 sa_mtime; }; /* * NFS Version 3 sattr structure for the new node creation case. */ struct nfsv3_sattr { u_int32_t sa_modetrue; u_int32_t sa_mode; u_int32_t sa_uidfalse; u_int32_t sa_gidfalse; u_int32_t sa_sizefalse; u_int32_t sa_atimetype; nfstime3 sa_atime; u_int32_t sa_mtimetype; nfstime3 sa_mtime; }; /* * IO Advise hint bits for NFSv4.2. * Since these go on the wire as a bitmap, the NFSATTRBIT macros are * used to manipulate these bits. */ #define NFSV4IOHINT_NORMAL 0 #define NFSV4IOHINT_SEQUENTIAL 1 #define NFSV4IOHINT_SEQUENTIALBACK 2 #define NFSV4IOHINT_RANDOM 3 #define NFSV4IOHINT_WILLNEED 4 #define NFSV4IOHINT_WILLNEEDOPTUN 5 #define NFSV4IOHINT_DONTNEED 6 #define NFSV4IOHINT_NOREUSE 7 #define NFSV4IOHINT_READ 8 #define NFSV4IOHINT_WRITE 9 #define NFSV4IOHINT_INITPROXIMITY 10 #endif /* _KERNEL */ /* * The attribute bits used for V4. * NFSATTRBIT_xxx defines the attribute# (and its bit position) * NFSATTRBM_xxx is a 32bit mask with the correct bit set within the * appropriate 32bit word. * NFSATTRBIT_MAX is one greater than the largest NFSATTRBIT_xxx */ #define NFSATTRBIT_SUPPORTEDATTRS 0 #define NFSATTRBIT_TYPE 1 #define NFSATTRBIT_FHEXPIRETYPE 2 #define NFSATTRBIT_CHANGE 3 #define NFSATTRBIT_SIZE 4 #define NFSATTRBIT_LINKSUPPORT 5 #define NFSATTRBIT_SYMLINKSUPPORT 6 #define NFSATTRBIT_NAMEDATTR 7 #define NFSATTRBIT_FSID 8 #define NFSATTRBIT_UNIQUEHANDLES 9 #define NFSATTRBIT_LEASETIME 10 #define NFSATTRBIT_RDATTRERROR 11 #define NFSATTRBIT_ACL 12 #define NFSATTRBIT_ACLSUPPORT 13 #define NFSATTRBIT_ARCHIVE 14 #define NFSATTRBIT_CANSETTIME 15 #define NFSATTRBIT_CASEINSENSITIVE 16 #define NFSATTRBIT_CASEPRESERVING 17 #define NFSATTRBIT_CHOWNRESTRICTED 18 #define NFSATTRBIT_FILEHANDLE 19 #define NFSATTRBIT_FILEID 20 #define NFSATTRBIT_FILESAVAIL 21 #define NFSATTRBIT_FILESFREE 22 #define NFSATTRBIT_FILESTOTAL 23 #define NFSATTRBIT_FSLOCATIONS 24 #define NFSATTRBIT_HIDDEN 25 #define NFSATTRBIT_HOMOGENEOUS 26 #define NFSATTRBIT_MAXFILESIZE 27 #define NFSATTRBIT_MAXLINK 28 #define NFSATTRBIT_MAXNAME 29 #define NFSATTRBIT_MAXREAD 30 #define NFSATTRBIT_MAXWRITE 31 #define NFSATTRBIT_MIMETYPE 32 #define NFSATTRBIT_MODE 33 #define NFSATTRBIT_NOTRUNC 34 #define NFSATTRBIT_NUMLINKS 35 #define NFSATTRBIT_OWNER 36 #define NFSATTRBIT_OWNERGROUP 37 #define NFSATTRBIT_QUOTAHARD 38 #define NFSATTRBIT_QUOTASOFT 39 #define NFSATTRBIT_QUOTAUSED 40 #define NFSATTRBIT_RAWDEV 41 #define NFSATTRBIT_SPACEAVAIL 42 #define NFSATTRBIT_SPACEFREE 43 #define NFSATTRBIT_SPACETOTAL 44 #define NFSATTRBIT_SPACEUSED 45 #define NFSATTRBIT_SYSTEM 46 #define NFSATTRBIT_TIMEACCESS 47 #define NFSATTRBIT_TIMEACCESSSET 48 #define NFSATTRBIT_TIMEBACKUP 49 #define NFSATTRBIT_TIMECREATE 50 #define NFSATTRBIT_TIMEDELTA 51 #define NFSATTRBIT_TIMEMETADATA 52 #define NFSATTRBIT_TIMEMODIFY 53 #define NFSATTRBIT_TIMEMODIFYSET 54 #define NFSATTRBIT_MOUNTEDONFILEID 55 #define NFSATTRBIT_DIRNOTIFDELAY 56 #define NFSATTRBIT_DIRENTNOTIFDELAY 57 #define NFSATTRBIT_DACL 58 #define NFSATTRBIT_SACL 59 #define NFSATTRBIT_CHANGEPOLICY 60 #define NFSATTRBIT_FSSTATUS 61 #define NFSATTRBIT_FSLAYOUTTYPE 62 #define NFSATTRBIT_LAYOUTHINT 63 #define NFSATTRBIT_LAYOUTTYPE 64 #define NFSATTRBIT_LAYOUTBLKSIZE 65 #define NFSATTRBIT_LAYOUTALIGNMENT 66 #define NFSATTRBIT_FSLOCATIONSINFO 67 #define NFSATTRBIT_MDSTHRESHOLD 68 #define NFSATTRBIT_RETENTIONGET 69 #define NFSATTRBIT_RETENTIONSET 70 #define NFSATTRBIT_RETENTEVTGET 71 #define NFSATTRBIT_RETENTEVTSET 72 #define NFSATTRBIT_RETENTIONHOLD 73 #define NFSATTRBIT_MODESETMASKED 74 #define NFSATTRBIT_SUPPATTREXCLCREAT 75 #define NFSATTRBIT_FSCHARSETCAP 76 #define NFSATTRBIT_CLONEBLKSIZE 77 #define NFSATTRBIT_SPACEFREED 78 #define NFSATTRBIT_CHANGEATTRTYPE 79 #define NFSATTRBIT_SECLABEL 80 /* Not sure what attribute bit #81 is? */ #define NFSATTRBIT_XATTRSUPPORT 82 #define NFSATTRBM_SUPPORTEDATTRS 0x00000001 #define NFSATTRBM_TYPE 0x00000002 #define NFSATTRBM_FHEXPIRETYPE 0x00000004 #define NFSATTRBM_CHANGE 0x00000008 #define NFSATTRBM_SIZE 0x00000010 #define NFSATTRBM_LINKSUPPORT 0x00000020 #define NFSATTRBM_SYMLINKSUPPORT 0x00000040 #define NFSATTRBM_NAMEDATTR 0x00000080 #define NFSATTRBM_FSID 0x00000100 #define NFSATTRBM_UNIQUEHANDLES 0x00000200 #define NFSATTRBM_LEASETIME 0x00000400 #define NFSATTRBM_RDATTRERROR 0x00000800 #define NFSATTRBM_ACL 0x00001000 #define NFSATTRBM_ACLSUPPORT 0x00002000 #define NFSATTRBM_ARCHIVE 0x00004000 #define NFSATTRBM_CANSETTIME 0x00008000 #define NFSATTRBM_CASEINSENSITIVE 0x00010000 #define NFSATTRBM_CASEPRESERVING 0x00020000 #define NFSATTRBM_CHOWNRESTRICTED 0x00040000 #define NFSATTRBM_FILEHANDLE 0x00080000 #define NFSATTRBM_FILEID 0x00100000 #define NFSATTRBM_FILESAVAIL 0x00200000 #define NFSATTRBM_FILESFREE 0x00400000 #define NFSATTRBM_FILESTOTAL 0x00800000 #define NFSATTRBM_FSLOCATIONS 0x01000000 #define NFSATTRBM_HIDDEN 0x02000000 #define NFSATTRBM_HOMOGENEOUS 0x04000000 #define NFSATTRBM_MAXFILESIZE 0x08000000 #define NFSATTRBM_MAXLINK 0x10000000 #define NFSATTRBM_MAXNAME 0x20000000 #define NFSATTRBM_MAXREAD 0x40000000 #define NFSATTRBM_MAXWRITE 0x80000000 #define NFSATTRBM_MIMETYPE 0x00000001 #define NFSATTRBM_MODE 0x00000002 #define NFSATTRBM_NOTRUNC 0x00000004 #define NFSATTRBM_NUMLINKS 0x00000008 #define NFSATTRBM_OWNER 0x00000010 #define NFSATTRBM_OWNERGROUP 0x00000020 #define NFSATTRBM_QUOTAHARD 0x00000040 #define NFSATTRBM_QUOTASOFT 0x00000080 #define NFSATTRBM_QUOTAUSED 0x00000100 #define NFSATTRBM_RAWDEV 0x00000200 #define NFSATTRBM_SPACEAVAIL 0x00000400 #define NFSATTRBM_SPACEFREE 0x00000800 #define NFSATTRBM_SPACETOTAL 0x00001000 #define NFSATTRBM_SPACEUSED 0x00002000 #define NFSATTRBM_SYSTEM 0x00004000 #define NFSATTRBM_TIMEACCESS 0x00008000 #define NFSATTRBM_TIMEACCESSSET 0x00010000 #define NFSATTRBM_TIMEBACKUP 0x00020000 #define NFSATTRBM_TIMECREATE 0x00040000 #define NFSATTRBM_TIMEDELTA 0x00080000 #define NFSATTRBM_TIMEMETADATA 0x00100000 #define NFSATTRBM_TIMEMODIFY 0x00200000 #define NFSATTRBM_TIMEMODIFYSET 0x00400000 #define NFSATTRBM_MOUNTEDONFILEID 0x00800000 #define NFSATTRBM_DIRNOTIFDELAY 0x01000000 #define NFSATTRBM_DIRENTNOTIFDELAY 0x02000000 #define NFSATTRBM_DACL 0x04000000 #define NFSATTRBM_SACL 0x08000000 #define NFSATTRBM_CHANGEPOLICY 0x10000000 #define NFSATTRBM_FSSTATUS 0x20000000 #define NFSATTRBM_FSLAYOUTTYPE 0x40000000 #define NFSATTRBM_LAYOUTHINT 0x80000000 #define NFSATTRBM_LAYOUTTYPE 0x00000001 #define NFSATTRBM_LAYOUTBLKSIZE 0x00000002 #define NFSATTRBM_LAYOUTALIGNMENT 0x00000004 #define NFSATTRBM_FSLOCATIONSINFO 0x00000008 #define NFSATTRBM_MDSTHRESHOLD 0x00000010 #define NFSATTRBM_RETENTIONGET 0x00000020 #define NFSATTRBM_RETENTIONSET 0x00000040 #define NFSATTRBM_RETENTEVTGET 0x00000080 #define NFSATTRBM_RETENTEVTSET 0x00000100 #define NFSATTRBM_RETENTIONHOLD 0x00000200 #define NFSATTRBM_MODESETMASKED 0x00000400 #define NFSATTRBM_SUPPATTREXCLCREAT 0x00000800 #define NFSATTRBM_FSCHARSETCAP 0x00001000 #define NFSATTRBM_CLONEBLKSIZE 0x00002000 #define NFSATTRBM_SPACEFREED 0x00004000 #define NFSATTRBM_CHANGEATTRTYPE 0x00008000 #define NFSATTRBM_SECLABEL 0x00010000 /* Not sure what attribute bit#81/0x00020000 is? */ #define NFSATTRBM_XATTRSUPPORT 0x00040000 #define NFSATTRBIT_MAX 83 /* * Sets of attributes that are supported, by words in the bitmap. */ /* * NFSATTRBIT_SUPPORTED - SUPP0 - bits 0<->31 * SUPP1 - bits 32<->63 * SUPP2 - bits 64<->95 */ #define NFSATTRBIT_SUPP0 \ (NFSATTRBM_SUPPORTEDATTRS | \ NFSATTRBM_TYPE | \ NFSATTRBM_FHEXPIRETYPE | \ NFSATTRBM_CHANGE | \ NFSATTRBM_SIZE | \ NFSATTRBM_LINKSUPPORT | \ NFSATTRBM_SYMLINKSUPPORT | \ NFSATTRBM_NAMEDATTR | \ NFSATTRBM_FSID | \ NFSATTRBM_UNIQUEHANDLES | \ NFSATTRBM_LEASETIME | \ NFSATTRBM_RDATTRERROR | \ NFSATTRBM_ACL | \ NFSATTRBM_ACLSUPPORT | \ NFSATTRBM_CANSETTIME | \ NFSATTRBM_CASEINSENSITIVE | \ NFSATTRBM_CASEPRESERVING | \ NFSATTRBM_CHOWNRESTRICTED | \ NFSATTRBM_FILEHANDLE | \ NFSATTRBM_FILEID | \ NFSATTRBM_FILESAVAIL | \ NFSATTRBM_FILESFREE | \ NFSATTRBM_FILESTOTAL | \ NFSATTRBM_FSLOCATIONS | \ NFSATTRBM_HOMOGENEOUS | \ NFSATTRBM_MAXFILESIZE | \ NFSATTRBM_MAXLINK | \ NFSATTRBM_MAXNAME | \ NFSATTRBM_MAXREAD | \ NFSATTRBM_MAXWRITE) /* * NFSATTRBIT_S1 - subset of SUPP1 - OR of the following bits: */ #define NFSATTRBIT_S1 \ (NFSATTRBM_MODE | \ NFSATTRBM_NOTRUNC | \ NFSATTRBM_NUMLINKS | \ NFSATTRBM_OWNER | \ NFSATTRBM_OWNERGROUP | \ NFSATTRBM_RAWDEV | \ NFSATTRBM_SPACEAVAIL | \ NFSATTRBM_SPACEFREE | \ NFSATTRBM_SPACETOTAL | \ NFSATTRBM_SPACEUSED | \ NFSATTRBM_TIMEACCESS | \ NFSATTRBM_TIMECREATE | \ NFSATTRBM_TIMEDELTA | \ NFSATTRBM_TIMEMETADATA | \ NFSATTRBM_TIMEMODIFY | \ NFSATTRBM_MOUNTEDONFILEID | \ NFSATTRBM_QUOTAHARD | \ NFSATTRBM_QUOTASOFT | \ NFSATTRBM_QUOTAUSED | \ NFSATTRBM_FSLAYOUTTYPE) #ifdef QUOTA /* * If QUOTA OR in NFSATTRBIT_QUOTAHARD, NFSATTRBIT_QUOTASOFT and * NFSATTRBIT_QUOTAUSED. */ #define NFSATTRBIT_SUPP1 (NFSATTRBIT_S1 | \ NFSATTRBM_QUOTAHARD | \ NFSATTRBM_QUOTASOFT | \ NFSATTRBM_QUOTAUSED) #else #define NFSATTRBIT_SUPP1 NFSATTRBIT_S1 #endif #define NFSATTRBIT_SUPP2 \ (NFSATTRBM_LAYOUTTYPE | \ NFSATTRBM_LAYOUTBLKSIZE | \ NFSATTRBM_LAYOUTALIGNMENT | \ NFSATTRBM_SUPPATTREXCLCREAT | \ NFSATTRBM_XATTRSUPPORT) /* * These are the set only attributes. */ #define NFSATTRBIT_SUPPSETONLY1 (NFSATTRBM_TIMEACCESSSET | \ NFSATTRBM_TIMEMODIFYSET) #define NFSATTRBIT_SUPPSETONLY2 (NFSATTRBM_MODESETMASKED) /* * NFSATTRBIT_SETABLE - SETABLE0 - bits 0<->31 * SETABLE1 - bits 32<->63 * SETABLE2 - bits 64<->95 */ #define NFSATTRBIT_SETABLE0 \ (NFSATTRBM_SIZE | \ NFSATTRBM_ACL) #define NFSATTRBIT_SETABLE1 \ (NFSATTRBM_MODE | \ NFSATTRBM_OWNER | \ NFSATTRBM_OWNERGROUP | \ NFSATTRBM_TIMECREATE | \ NFSATTRBM_TIMEACCESSSET | \ NFSATTRBM_TIMEMODIFYSET) #define NFSATTRBIT_SETABLE2 \ (NFSATTRBM_MODESETMASKED) /* * NFSATTRBIT_NFSV41 - Attributes only supported by NFSv4.1. */ #define NFSATTRBIT_NFSV41_1 \ (NFSATTRBM_FSLAYOUTTYPE) #define NFSATTRBIT_NFSV41_2 \ (NFSATTRBM_LAYOUTTYPE | \ NFSATTRBM_LAYOUTBLKSIZE | \ NFSATTRBM_LAYOUTALIGNMENT | \ NFSATTRBM_MODESETMASKED | \ NFSATTRBM_SUPPATTREXCLCREAT) /* * NFSATTRBIT_NFSV42 - Attributes only supported by NFSv4.2. */ #define NFSATTRBIT_NFSV42_2 NFSATTRBM_XATTRSUPPORT /* * Set of attributes that the getattr vnode op needs. * OR of the following bits. * NFSATTRBIT_GETATTR0 - bits 0<->31 */ #define NFSATTRBIT_GETATTR0 \ (NFSATTRBM_SUPPORTEDATTRS | \ NFSATTRBM_TYPE | \ NFSATTRBM_CHANGE | \ NFSATTRBM_SIZE | \ NFSATTRBM_FSID | \ NFSATTRBM_FILEID | \ NFSATTRBM_MAXREAD) /* * NFSATTRBIT_GETATTR1 - bits 32<->63 */ #define NFSATTRBIT_GETATTR1 \ (NFSATTRBM_MODE | \ NFSATTRBM_NUMLINKS | \ NFSATTRBM_OWNER | \ NFSATTRBM_OWNERGROUP | \ NFSATTRBM_RAWDEV | \ NFSATTRBM_SPACEUSED | \ NFSATTRBM_TIMEACCESS | \ NFSATTRBM_TIMECREATE | \ NFSATTRBM_TIMEMETADATA | \ NFSATTRBM_TIMEMODIFY) /* * NFSATTRBIT_GETATTR2 - bits 64<->95 */ #define NFSATTRBIT_GETATTR2 0 /* * Subset of the above that the Write RPC gets. * OR of the following bits. * NFSATTRBIT_WRITEGETATTR0 - bits 0<->31 */ #define NFSATTRBIT_WRITEGETATTR0 \ (NFSATTRBM_SUPPORTEDATTRS | \ NFSATTRBM_TYPE | \ NFSATTRBM_CHANGE | \ NFSATTRBM_SIZE | \ NFSATTRBM_FSID | \ NFSATTRBM_FILEID | \ NFSATTRBM_MAXREAD) /* * NFSATTRBIT_WRITEGETATTR1 - bits 32<->63 */ #define NFSATTRBIT_WRITEGETATTR1 \ (NFSATTRBM_MODE | \ NFSATTRBM_NUMLINKS | \ NFSATTRBM_RAWDEV | \ NFSATTRBM_SPACEUSED | \ NFSATTRBM_TIMEACCESS | \ NFSATTRBM_TIMECREATE | \ NFSATTRBM_TIMEMETADATA | \ NFSATTRBM_TIMEMODIFY) /* * NFSATTRBIT_WRITEGETATTR2 - bits 64<->95 */ #define NFSATTRBIT_WRITEGETATTR2 0 /* * Set of attributes that the wccattr operation op needs. * OR of the following bits. * NFSATTRBIT_WCCATTR0 - bits 0<->31 */ #define NFSATTRBIT_WCCATTR0 0 /* * NFSATTRBIT_WCCATTR1 - bits 32<->63 */ #define NFSATTRBIT_WCCATTR1 \ (NFSATTRBM_TIMEMODIFY) /* * NFSATTRBIT_WCCATTR2 - bits 64<->95 */ #define NFSATTRBIT_WCCATTR2 0 /* * NFSATTRBIT_CBGETATTR0 - bits 0<->31 */ #define NFSATTRBIT_CBGETATTR0 (NFSATTRBM_CHANGE | NFSATTRBM_SIZE) /* * NFSATTRBIT_CBGETATTR1 - bits 32<->63 */ #define NFSATTRBIT_CBGETATTR1 0x0 /* * NFSATTRBIT_CBGETATTR2 - bits 64<->95 */ #define NFSATTRBIT_CBGETATTR2 0x0 /* * Sets of attributes that require a VFS_STATFS() call to get the * values of. * NFSATTRBIT_STATFS0 - bits 0<->31 */ #define NFSATTRBIT_STATFS0 \ (NFSATTRBM_LINKSUPPORT | \ NFSATTRBM_SYMLINKSUPPORT | \ NFSATTRBM_CANSETTIME | \ NFSATTRBM_FILESAVAIL | \ NFSATTRBM_FILESFREE | \ NFSATTRBM_FILESTOTAL | \ NFSATTRBM_HOMOGENEOUS | \ NFSATTRBM_MAXFILESIZE | \ NFSATTRBM_MAXNAME | \ NFSATTRBM_MAXREAD | \ NFSATTRBM_MAXWRITE) /* * NFSATTRBIT_STATFS1 - bits 32<->63 */ #define NFSATTRBIT_STATFS1 \ (NFSATTRBM_QUOTAHARD | \ NFSATTRBM_QUOTASOFT | \ NFSATTRBM_QUOTAUSED | \ NFSATTRBM_SPACEAVAIL | \ NFSATTRBM_SPACEFREE | \ NFSATTRBM_SPACETOTAL | \ NFSATTRBM_SPACEUSED | \ NFSATTRBM_TIMEDELTA) /* * NFSATTRBIT_STATFS2 - bits 64<->95 */ #define NFSATTRBIT_STATFS2 0 /* * These are the bits that are needed by the nfs_statfs() call. * (The regular getattr bits are or'd in so the vnode gets the correct * type, etc.) * NFSGETATTRBIT_STATFS0 - bits 0<->31 */ #define NFSGETATTRBIT_STATFS0 (NFSATTRBIT_GETATTR0 | \ NFSATTRBM_LINKSUPPORT | \ NFSATTRBM_SYMLINKSUPPORT | \ NFSATTRBM_CANSETTIME | \ NFSATTRBM_FILESFREE | \ NFSATTRBM_FILESTOTAL | \ NFSATTRBM_HOMOGENEOUS | \ NFSATTRBM_MAXFILESIZE | \ NFSATTRBM_MAXNAME | \ NFSATTRBM_MAXREAD | \ NFSATTRBM_MAXWRITE) /* * NFSGETATTRBIT_STATFS1 - bits 32<->63 */ #define NFSGETATTRBIT_STATFS1 (NFSATTRBIT_GETATTR1 | \ NFSATTRBM_SPACEAVAIL | \ NFSATTRBM_SPACEFREE | \ NFSATTRBM_SPACETOTAL | \ NFSATTRBM_TIMEDELTA) /* * NFSGETATTRBIT_STATFS2 - bits 64<->95 */ #define NFSGETATTRBIT_STATFS2 0 /* * Set of attributes for the equivalent of an nfsv3 pathconf rpc. * NFSGETATTRBIT_PATHCONF0 - bits 0<->31 */ #define NFSGETATTRBIT_PATHCONF0 (NFSATTRBIT_GETATTR0 | \ NFSATTRBM_CASEINSENSITIVE | \ NFSATTRBM_CASEPRESERVING | \ NFSATTRBM_CHOWNRESTRICTED | \ NFSATTRBM_MAXLINK | \ NFSATTRBM_MAXNAME) /* * NFSGETATTRBIT_PATHCONF1 - bits 32<->63 */ #define NFSGETATTRBIT_PATHCONF1 (NFSATTRBIT_GETATTR1 | \ NFSATTRBM_NOTRUNC) /* * NFSGETATTRBIT_PATHCONF2 - bits 64<->95 */ #define NFSGETATTRBIT_PATHCONF2 0 /* * Sets of attributes required by readdir and readdirplus. * NFSATTRBIT_READDIRPLUS0 (NFSATTRBIT_GETATTR0 | NFSATTRBIT_FILEHANDLE | * NFSATTRBIT_RDATTRERROR) */ #define NFSATTRBIT_READDIRPLUS0 (NFSATTRBIT_GETATTR0 | NFSATTRBM_FILEHANDLE | \ NFSATTRBM_RDATTRERROR) #define NFSATTRBIT_READDIRPLUS1 NFSATTRBIT_GETATTR1 #define NFSATTRBIT_READDIRPLUS2 0 /* * Set of attributes supported by Referral vnodes. */ #define NFSATTRBIT_REFERRAL0 (NFSATTRBM_TYPE | NFSATTRBM_FSID | \ NFSATTRBM_RDATTRERROR | NFSATTRBM_FSLOCATIONS) #define NFSATTRBIT_REFERRAL1 NFSATTRBM_MOUNTEDONFILEID #define NFSATTRBIT_REFERRAL2 0 /* * Structure for data handled by the statfs rpc. Since some fields are * u_int64_t, this cannot be used for copying data on/off the wire, due * to alignment concerns. */ struct nfsstatfs { union { struct { u_int32_t nfsv2sf_tsize; u_int32_t nfsv2sf_bsize; u_int32_t nfsv2sf_blocks; u_int32_t nfsv2sf_bfree; u_int32_t nfsv2sf_bavail; } sf_nfsv2; struct { u_int64_t nfsv3sf_tbytes; u_int64_t nfsv3sf_fbytes; u_int64_t nfsv3sf_abytes; u_int64_t nfsv3sf_tfiles; u_int64_t nfsv3sf_ffiles; u_int64_t nfsv3sf_afiles; u_int32_t nfsv3sf_invarsec; } sf_nfsv3; } sf_un; }; #define sf_tsize sf_un.sf_nfsv2.nfsv2sf_tsize #define sf_bsize sf_un.sf_nfsv2.nfsv2sf_bsize #define sf_blocks sf_un.sf_nfsv2.nfsv2sf_blocks #define sf_bfree sf_un.sf_nfsv2.nfsv2sf_bfree #define sf_bavail sf_un.sf_nfsv2.nfsv2sf_bavail #define sf_tbytes sf_un.sf_nfsv3.nfsv3sf_tbytes #define sf_fbytes sf_un.sf_nfsv3.nfsv3sf_fbytes #define sf_abytes sf_un.sf_nfsv3.nfsv3sf_abytes #define sf_tfiles sf_un.sf_nfsv3.nfsv3sf_tfiles #define sf_ffiles sf_un.sf_nfsv3.nfsv3sf_ffiles #define sf_afiles sf_un.sf_nfsv3.nfsv3sf_afiles #define sf_invarsec sf_un.sf_nfsv3.nfsv3sf_invarsec /* * Now defined using u_int64_t for the 64 bit field(s). * (Cannot be used to move data on/off the wire, due to alignment concerns.) */ struct nfsfsinfo { u_int32_t fs_rtmax; u_int32_t fs_rtpref; u_int32_t fs_rtmult; u_int32_t fs_wtmax; u_int32_t fs_wtpref; u_int32_t fs_wtmult; u_int32_t fs_dtpref; u_int64_t fs_maxfilesize; struct timespec fs_timedelta; u_int32_t fs_properties; }; /* * Bits for fs_properties */ #define NFSV3_FSFLINK 0x1 #define NFSV3_FSFSYMLINK 0x2 #define NFSV3_FSFHOMOGENEOUS 0x4 #define NFSV3_FSFCANSETTIME 0x8 /* * Yikes, overload fs_rtmult as fs_maxname for V4. */ #define fs_maxname fs_rtmult struct nfsv3_pathconf { u_int32_t pc_linkmax; u_int32_t pc_namemax; u_int32_t pc_notrunc; u_int32_t pc_chownrestricted; u_int32_t pc_caseinsensitive; u_int32_t pc_casepreserving; }; /* * NFS V4 data structures. */ struct nfsv4stateid { u_int32_t seqid; u_int32_t other[NFSX_STATEIDOTHER / NFSX_UNSIGNED]; }; typedef struct nfsv4stateid nfsv4stateid_t; /* Notify bits and notify bitmap size. */ #define NFSV4NOTIFY_CHANGE 1 #define NFSV4NOTIFY_DELETE 2 #define NFSV4_NOTIFYBITMAP 1 /* # of 32bit values needed for bits */ /* Layoutreturn kinds. */ #define NFSV4LAYOUTRET_FILE 1 #define NFSV4LAYOUTRET_FSID 2 #define NFSV4LAYOUTRET_ALL 3 /* Seek Contents. */ #define NFSV4CONTENT_DATA 0 #define NFSV4CONTENT_HOLE 1 /* Options for Set Extended attribute (RFC-8276). */ #define NFSV4SXATTR_EITHER 0 #define NFSV4SXATTR_CREATE 1 #define NFSV4SXATTR_REPLACE 2 #endif /* _NFS_NFSPROTO_H_ */ diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index 7bcbc738d61b..efe9aac7a136 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -1,6762 +1,6843 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include /* * Functions that perform the vfs operations required by the routines in * nfsd_serv.c. It is hoped that this change will make the server more * portable. */ #include #include #include #include #include #include #include FEATURE(nfsd, "NFSv4 server"); extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern int nfsrv_useacl; extern int newnfs_numnfsd; extern struct mount nfsv4root_mnt; extern struct nfsrv_stablefirst nfsrv_stablefirst; extern void (*nfsd_call_servertimer)(void); extern SVCPOOL *nfsrvd_pool; extern struct nfsv4lock nfsd_suspend_lock; extern struct nfsclienthashhead *nfsclienthash; extern struct nfslockhashhead *nfslockhash; extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; extern struct nfsstatsv1 nfsstatsv1; extern struct nfslayouthash *nfslayouthash; extern int nfsrv_layouthashsize; extern struct mtx nfsrv_dslock_mtx; extern int nfs_pnfsiothreads; extern struct nfsdontlisthead nfsrv_dontlisthead; extern volatile int nfsrv_dontlistlen; extern volatile int nfsrv_devidcnt; extern int nfsrv_maxpnfsmirror; +extern uint32_t nfs_srvmaxio; +extern int nfs_bufpackets; +extern u_long sb_max_adj; struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; NFSDLOCKMUTEX; NFSSTATESPINLOCK; struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; struct mtx nfsrc_udpmtx; struct mtx nfs_v4root_mutex; struct mtx nfsrv_dontlistlock_mtx; struct mtx nfsrv_recalllock_mtx; struct nfsrvfh nfs_rootfh, nfs_pubfh; int nfs_pubfhset = 0, nfs_rootfhset = 0; struct proc *nfsd_master_proc = NULL; int nfsd_debuglevel = 0; static pid_t nfsd_master_pid = (pid_t)-1; static char nfsd_master_comm[MAXCOMLEN + 1]; static struct timeval nfsd_master_start; static uint32_t nfsv4_sysid = 0; static fhandle_t zerofh; static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, struct ucred *); int nfsrv_enable_crossmntpt = 1; static int nfs_commit_blks; static int nfs_commit_miss; extern int nfsrv_issuedelegs; extern int nfsrv_dolocallocks; extern int nfsd_enable_stringtouid; extern struct nfsdevicehead nfsrv_devidhead; static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **, struct iovec **); static int nfsrv_createiovec_extpgs(int, int, struct mbuf **, struct mbuf **, struct iovec **); static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **, int *); static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, NFSPROC_T *); static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, int *, char *, fhandle_t *); static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, NFSPROC_T *); static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *, struct thread *, int, struct mbuf **, char *, struct mbuf **, struct nfsvattr *, struct acl *, off_t *, int, bool *); static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, char *, int *); static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct acl *, int *); static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount *, struct nfsvattr *); static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *, NFSPROC_T *, struct nfsmount *); static int nfsrv_putfhname(fhandle_t *, char *); static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, struct pnfsdsfile *, struct vnode **, NFSPROC_T *); static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *, struct vnode *, NFSPROC_T *); static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, NFSPROC_T *); static int nfsrv_pnfsstatfs(struct statfs *, struct mount *); int nfs_pnfsio(task_fn_t *, void *); SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 0, "Debug level for NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); static int nfsrv_pnfsgetdsattr = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); /* * nfsrv_dsdirsize can only be increased and only when the nfsd threads are * not running. * The dsN subdirectories for the increased values must have been created * on all DS servers before this increase is done. */ u_int nfsrv_dsdirsize = 20; static int sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) { int error, newdsdirsize; newdsdirsize = nfsrv_dsdirsize; error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || newnfs_numnfsd != 0) return (EINVAL); nfsrv_dsdirsize = newdsdirsize; return (0); } SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize), sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers"); +/* + * nfs_srvmaxio can only be increased and only when the nfsd threads are + * not running. The setting must be a power of 2, with the current limit of + * 1Mbyte. + */ +static int +sysctl_srvmaxio(SYSCTL_HANDLER_ARGS) +{ + int error; + u_int newsrvmaxio; + uint64_t tval; + + newsrvmaxio = nfs_srvmaxio; + error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (newsrvmaxio == nfs_srvmaxio) + return (0); + if (newsrvmaxio < nfs_srvmaxio) { + printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n"); + return (EINVAL); + } + if (newsrvmaxio > 1048576) { + printf("nfsd: vfs.nfsd.srvmaxio cannot be > 1Mbyte\n"); + return (EINVAL); + } + if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) { + printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n"); + return (EINVAL); + } + + /* + * Check that kern.ipc.maxsockbuf is large enough for + * newsrviomax, given the setting of vfs.nfs.bufpackets. + */ + if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets > + sb_max_adj) { + /* + * Suggest vfs.nfs.bufpackets * maximum RPC message for + * sb_max_adj. + */ + tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets; + + /* + * Convert suggested sb_max_adj value to a suggested + * sb_max value, which is what is set via kern.ipc.maxsockbuf. + * Perform the inverse calculation of (from uipc_sockbuf.c): + * sb_max_adj = (u_quad_t)sb_max * MCLBYTES / + * (MSIZE + MCLBYTES); + * XXX If the calculation of sb_max_adj from sb_max changes, + * this calculation must be changed as well. + */ + tval *= (MSIZE + MCLBYTES); /* Brackets for readability. */ + tval += MCLBYTES - 1; /* Round up divide. */ + tval /= MCLBYTES; + printf("nfsd: set kern.ipc.maxsockbuf to a minimum of " + "%ju to support %ubyte NFS I/O\n", (uintmax_t)tval, + newsrvmaxio); + return (EINVAL); + } + + NFSD_LOCK(); + if (newnfs_numnfsd != 0) { + NFSD_UNLOCK(); + printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd " + "threads are running\n"); + return (EINVAL); + } + + + nfs_srvmaxio = newsrvmaxio; + NFSD_UNLOCK(); + return (0); +} +SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio, + CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, + sysctl_srvmaxio, "IU", "Maximum I/O size in bytes"); + #define MAX_REORDERED_RPC 16 #define NUM_HEURISTIC 1031 #define NHUSE_INIT 64 #define NHUSE_INC 16 #define NHUSE_MAX 2048 static struct nfsheur { struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ off_t nh_nextoff; /* next offset for sequential detection */ int nh_use; /* use count for selection */ int nh_seqcount; /* heuristic */ } nfsheur[NUM_HEURISTIC]; /* * Heuristic to detect sequential operation. */ static struct nfsheur * nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) { struct nfsheur *nh; int hi, try; /* Locate best candidate. */ try = 32; hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; nh = &nfsheur[hi]; while (try--) { if (nfsheur[hi].nh_vp == vp) { nh = &nfsheur[hi]; break; } if (nfsheur[hi].nh_use > 0) --nfsheur[hi].nh_use; hi = (hi + 1) % NUM_HEURISTIC; if (nfsheur[hi].nh_use < nh->nh_use) nh = &nfsheur[hi]; } /* Initialize hint if this is a new file. */ if (nh->nh_vp != vp) { nh->nh_vp = vp; nh->nh_nextoff = uio->uio_offset; nh->nh_use = NHUSE_INIT; if (uio->uio_offset == 0) nh->nh_seqcount = 4; else nh->nh_seqcount = 1; } /* Calculate heuristic. */ if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || uio->uio_offset == nh->nh_nextoff) { /* See comments in vfs_vnops.c:sequential_heuristic(). */ nh->nh_seqcount += howmany(uio->uio_resid, 16384); if (nh->nh_seqcount > IO_SEQMAX) nh->nh_seqcount = IO_SEQMAX; } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { /* Probably a reordered RPC, leave seqcount alone. */ } else if (nh->nh_seqcount > 1) { nh->nh_seqcount /= 2; } else { nh->nh_seqcount = 0; } nh->nh_use += NHUSE_INC; if (nh->nh_use > NHUSE_MAX) nh->nh_use = NHUSE_MAX; return (nh); } /* * Get attributes into nfsvattr structure. */ int nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct nfsrv_descript *nd, struct thread *p, int vpislocked, nfsattrbit_t *attrbitp) { int error, gotattr, lockedit = 0; struct nfsvattr na; if (vpislocked == 0) { /* * When vpislocked == 0, the vnode is either exclusively * locked by this thread or not locked by this thread. * As such, shared lock it, if not exclusively locked. */ if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { lockedit = 1; NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); } } /* * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed * attributes, as required. * This needs to be done for regular files if: * - non-NFSv4 RPCs or * - when attrbitp == NULL or * - an NFSv4 RPC with any of the above attributes in attrbitp. * A return of 0 for nfsrv_proxyds() indicates that it has acquired * these attributes. nfsrv_proxyds() will return an error if the * server is not a pNFS one. */ gotattr = 0; if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || (nd->nd_flag & ND_NFSV4) == 0 || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) { error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p, NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0, NULL); if (error == 0) gotattr = 1; } error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); if (lockedit != 0) NFSVOPUNLOCK(vp); /* * If we got the Change, Size and Modify Time from the DS, * replace them. */ if (gotattr != 0) { nvap->na_atime = na.na_atime; nvap->na_mtime = na.na_mtime; nvap->na_filerev = na.na_filerev; nvap->na_size = na.na_size; nvap->na_bytes = na.na_bytes; } NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, error, (uintmax_t)na.na_filerev); NFSEXITCODE(error); return (error); } /* * Get a file handle for a vnode. */ int nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) { int error; NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VOP_VPTOFH(vp, &fhp->fh_fid); NFSEXITCODE(error); return (error); } /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use * vn_writechk() and VOP_ACCESSX() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write * case. * 2 - The owner is to be given access irrespective of mode bits for some * operations, so that processes that chmod after opening a file don't * break. */ int nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, u_int32_t *supportedtypep) { struct vattr vattr; int error = 0, getret = 0; if (vpislocked == 0) { if (NFSVOPLOCK(vp, LK_SHARED) != 0) { error = EPERM; goto out; } } if (accmode & VWRITE) { /* Just vn_writechk() changed to check rdonly */ /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character * device resident on the file system. */ if (NFSVNO_EXRDONLY(exp) || (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: error = EROFS; default: break; } } /* * If there's shared text associated with * the inode, try to free it up once. If * we fail, we can't allow writing. */ if (VOP_IS_TEXT(vp) && error == 0) error = ETXTBSY; } if (error != 0) { if (vpislocked == 0) NFSVOPUNLOCK(vp); goto out; } /* * Should the override still be applied when ACLs are enabled? */ error = VOP_ACCESSX(vp, accmode, cred, p); if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { /* * Try again with VEXPLICIT_DENY, to see if the test for * deletion is supported. */ error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); if (error == 0) { if (vp->v_type == VDIR) { accmode &= ~(VDELETE | VDELETE_CHILD); accmode |= VWRITE; error = VOP_ACCESSX(vp, accmode, cred, p); } else if (supportedtypep != NULL) { *supportedtypep &= ~NFSACCESS_DELETE; } } } /* * Allow certain operations for the owner (reads and writes * on files that are already open). */ if (override != NFSACCCHK_NOOVERRIDE && (error == EPERM || error == EACCES)) { if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) error = 0; else if (override & NFSACCCHK_ALLOWOWNER) { getret = VOP_GETATTR(vp, &vattr, cred); if (getret == 0 && cred->cr_uid == vattr.va_uid) error = 0; } } if (vpislocked == 0) NFSVOPUNLOCK(vp); out: NFSEXITCODE(error); return (error); } /* * Set attribute(s) vnop. */ int nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { u_quad_t savsize = 0; int error, savedit; time_t savbtime; /* * If this is an exported file system and a pNFS service is running, * don't VOP_SETATTR() of size for the MDS file system. */ savedit = 0; error = 0; if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 && nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL && nvap->na_vattr.va_size > 0) { savsize = nvap->na_vattr.va_size; nvap->na_vattr.va_size = VNOVAL; if (nvap->na_vattr.va_uid != (uid_t)VNOVAL || nvap->na_vattr.va_gid != (gid_t)VNOVAL || nvap->na_vattr.va_mode != (mode_t)VNOVAL || nvap->na_vattr.va_atime.tv_sec != VNOVAL || nvap->na_vattr.va_mtime.tv_sec != VNOVAL) savedit = 1; else savedit = 2; } if (savedit != 2) error = VOP_SETATTR(vp, &nvap->na_vattr, cred); if (savedit != 0) nvap->na_vattr.va_size = savsize; if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || nvap->na_vattr.va_gid != (gid_t)VNOVAL || nvap->na_vattr.va_size != VNOVAL || nvap->na_vattr.va_mode != (mode_t)VNOVAL || nvap->na_vattr.va_atime.tv_sec != VNOVAL || nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { /* Never modify birthtime on a DS file. */ savbtime = nvap->na_vattr.va_birthtime.tv_sec; nvap->na_vattr.va_birthtime.tv_sec = VNOVAL; /* For a pNFS server, set the attributes on the DS file. */ error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR, NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL); nvap->na_vattr.va_birthtime.tv_sec = savbtime; if (error == ENOENT) error = 0; } NFSEXITCODE(error); return (error); } /* * Set up nameidata for a lookup() call and do it. */ int nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, struct vnode **retdirp) { struct componentname *cnp = &ndp->ni_cnd; int i; struct iovec aiov; struct uio auio; int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; int error = 0; char *cp; *retdirp = NULL; cnp->cn_nameptr = cnp->cn_pnbuf; ndp->ni_lcf = 0; /* * Extract and set starting directory. */ if (dp->v_type != VDIR) { if (islocked) vput(dp); else vrele(dp); nfsvno_relpathbuf(ndp); error = ENOTDIR; goto out1; } if (islocked) NFSVOPUNLOCK(dp); VREF(dp); *retdirp = dp; if (NFSVNO_EXRDONLY(exp)) cnp->cn_flags |= RDONLY; ndp->ni_segflg = UIO_SYSSPACE; if (nd->nd_flag & ND_PUBLOOKUP) { ndp->ni_loopcnt = 0; if (cnp->cn_pnbuf[0] == '/') { vrele(dp); /* * Check for degenerate pathnames here, since lookup() * panics on them. */ for (i = 1; i < ndp->ni_pathlen; i++) if (cnp->cn_pnbuf[i] != '/') break; if (i == ndp->ni_pathlen) { error = NFSERR_ACCES; goto out; } dp = rootvnode; VREF(dp); } } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || (nd->nd_flag & ND_NFSV4) == 0) { /* * Only cross mount points for NFSv4 when doing a * mount while traversing the file system above * the mount point, unless nfsrv_enable_crossmntpt is set. */ cnp->cn_flags |= NOCROSSMOUNT; } /* * Initialize for scan, set ni_startdir and bump ref on dp again * because lookup() will dereference ni_startdir. */ cnp->cn_thread = p; ndp->ni_startdir = dp; ndp->ni_rootdir = rootvnode; ndp->ni_topdir = NULL; if (!lockleaf) cnp->cn_flags |= LOCKLEAF; for (;;) { cnp->cn_nameptr = cnp->cn_pnbuf; /* * Call lookup() to do the real work. If an error occurs, * ndp->ni_vp and ni_dvp are left uninitialized or NULL and * we do not have to dereference anything before returning. * In either case ni_startdir will be dereferenced and NULLed * out. */ error = lookup(ndp); if (error) break; /* * Check for encountering a symbolic link. Trivial * termination occurs if no symlink encountered. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) nfsvno_relpathbuf(ndp); if (ndp->ni_vp && !lockleaf) NFSVOPUNLOCK(ndp->ni_vp); break; } /* * Validate symlink */ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) NFSVOPUNLOCK(ndp->ni_dvp); if (!(nd->nd_flag & ND_PUBLOOKUP)) { error = EINVAL; goto badlink2; } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; goto badlink2; } if (ndp->ni_pathlen > 1) cp = uma_zalloc(namei_zone, M_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = NULL; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { badlink1: if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); badlink2: vrele(ndp->ni_dvp); vput(ndp->ni_vp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { error = ENOENT; goto badlink1; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { error = ENAMETOOLONG; goto badlink1; } /* * Adjust or replace path */ if (ndp->ni_pathlen > 1) { NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; /* * Cleanup refs for next loop and check if root directory * should replace current directory. Normally ni_dvp * becomes the new base directory and is cleaned up when * we loop. Explicitly null pointers after invalidation * to clarify operation. */ vput(ndp->ni_vp); ndp->ni_vp = NULL; if (cnp->cn_pnbuf[0] == '/') { vrele(ndp->ni_dvp); ndp->ni_dvp = ndp->ni_rootdir; VREF(ndp->ni_dvp); } ndp->ni_startdir = ndp->ni_dvp; ndp->ni_dvp = NULL; } if (!lockleaf) cnp->cn_flags &= ~LOCKLEAF; out: if (error) { nfsvno_relpathbuf(ndp); ndp->ni_vp = NULL; ndp->ni_dvp = NULL; ndp->ni_startdir = NULL; } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { ndp->ni_dvp = NULL; } out1: NFSEXITCODE2(error, nd); return (error); } /* * Set up a pathname buffer and return a pointer to it and, optionally * set a hash pointer. */ void nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) { struct componentname *cnp = &ndp->ni_cnd; cnp->cn_flags |= (NOMACCHECK | HASBUF); cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); if (hashpp != NULL) *hashpp = NULL; *bufpp = cnp->cn_pnbuf; } /* * Release the above path buffer, if not released by nfsvno_namei(). */ void nfsvno_relpathbuf(struct nameidata *ndp) { if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) panic("nfsrelpath"); uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } /* * Readlink vnode op into an mbuf list. */ int nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) { struct iovec *iv; struct uio io, *uiop = &io; struct mbuf *mp, *mp3; int len, tlen, error = 0; len = NFS_MAXPATHLEN; if (maxextsiz > 0) uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, &mp3, &mp, &iv); else uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv); uiop->uio_iov = iv; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; error = VOP_READLINK(vp, uiop, cred); free(iv, M_TEMP); if (error) { m_freem(mp3); *lenp = 0; goto out; } if (uiop->uio_resid > 0) { len -= uiop->uio_resid; tlen = NFSM_RNDUP(len); if (tlen == 0) { m_freem(mp3); mp3 = mp = NULL; } else if (tlen != NFS_MAXPATHLEN || tlen != len) mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len); } *lenp = len; *mpp = mp3; *mpendp = mp; out: NFSEXITCODE(error); return (error); } /* * Create an mbuf chain and an associated iovec that can be used to Read * or Getextattr of data. * Upon success, return pointers to the first and last mbufs in the chain * plus the malloc'd iovec and its iovlen. */ static int nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp, struct iovec **ivp) { struct mbuf *m, *m2 = NULL, *m3; struct iovec *iv; int i, left, siz; left = len; m3 = NULL; /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; while (left > 0) { NFSMGET(m); MCLGET(m, M_WAITOK); m->m_len = 0; siz = min(M_TRAILINGSPACE(m), left); left -= siz; i++; if (m3) m2->m_next = m; else m3 = m; m2 = m; } *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); m = m3; left = len; i = 0; while (left > 0) { if (m == NULL) panic("nfsrv_createiovec iov"); siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { iv->iov_base = mtod(m, caddr_t) + m->m_len; iv->iov_len = siz; m->m_len += siz; left -= siz; iv++; i++; } m = m->m_next; } *mpp = m3; *mpendp = m2; return (i); } /* * Create an mbuf chain and an associated iovec that can be used to Read * or Getextattr of data. * Upon success, return pointers to the first and last mbufs in the chain * plus the malloc'd iovec and its iovlen. * Same as above, but creates ext_pgs mbuf(s). */ static int nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp, struct mbuf **mpendp, struct iovec **ivp) { struct mbuf *m, *m2 = NULL, *m3; struct iovec *iv; int i, left, pgno, siz; left = len; m3 = NULL; /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; while (left > 0) { siz = min(left, maxextsiz); m = mb_alloc_ext_plus_pages(siz, M_WAITOK); left -= siz; i += m->m_epg_npgs; if (m3 != NULL) m2->m_next = m; else m3 = m; m2 = m; } *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); m = m3; left = len; i = 0; pgno = 0; while (left > 0) { if (m == NULL) panic("nfsvno_createiovec_extpgs iov"); siz = min(PAGE_SIZE, left); if (siz > 0) { iv->iov_base = (void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]); iv->iov_len = siz; m->m_len += siz; if (pgno == m->m_epg_npgs - 1) m->m_epg_last_len = siz; left -= siz; iv++; i++; pgno++; } if (pgno == m->m_epg_npgs && left > 0) { m = m->m_next; if (m == NULL) panic("nfsvno_createiovec_extpgs iov"); pgno = 0; } } *mpp = m3; *mpendp = m2; return (i); } /* * Read vnode op call into mbuf list. */ int nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, int maxextsiz, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) { struct mbuf *m; struct iovec *iv; int error = 0, len, tlen, ioflag = 0; struct mbuf *m3; struct uio io, *uiop = &io; struct nfsheur *nh; /* * Attempt to read from a DS file. A return of ENOENT implies * there is no DS file to read. */ error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp, NULL, mpendp, NULL, NULL, NULL, 0, NULL); if (error != ENOENT) return (error); len = NFSM_RNDUP(cnt); if (maxextsiz > 0) uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz, &m3, &m, &iv); else uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv); uiop->uio_iov = iv; uiop->uio_offset = off; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; nh = nfsrv_sequential_heuristic(uiop, vp); ioflag |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); free(iv, M_TEMP); if (error) { m_freem(m3); *mpp = NULL; goto out; } nh->nh_nextoff = uiop->uio_offset; tlen = len - uiop->uio_resid; cnt = cnt < tlen ? cnt : tlen; tlen = NFSM_RNDUP(cnt); if (tlen == 0) { m_freem(m3); m3 = m = NULL; } else if (len != tlen || tlen != cnt) m = nfsrv_adj(m3, len - tlen, tlen - cnt); *mpp = m3; *mpendp = m; out: NFSEXITCODE(error); return (error); } /* * Create the iovec for the mbuf chain passed in as an argument. * The "cp" argument is where the data starts within the first mbuf in * the chain. It returns the iovec and the iovcnt. */ static int nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp, int *iovcntp) { struct mbuf *mp; struct iovec *ivp; int cnt, i, len; /* * Loop through the mbuf chain, counting how many mbufs are a * part of this write operation, so the iovec size is known. */ cnt = 0; len = retlen; mp = m; i = mtod(mp, caddr_t) + mp->m_len - cp; while (len > 0) { if (i > 0) { len -= i; cnt++; } mp = mp->m_next; if (!mp) { if (len > 0) return (EBADRPC); } else i = mp->m_len; } /* Now, create the iovec. */ mp = m; *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); *iovcntp = cnt; i = mtod(mp, caddr_t) + mp->m_len - cp; len = retlen; while (len > 0) { if (mp == NULL) panic("nfsrv_createiovecw"); if (i > 0) { i = min(i, len); ivp->iov_base = cp; ivp->iov_len = i; ivp++; len -= i; } mp = mp->m_next; if (mp) { i = mp->m_len; cp = mtod(mp, caddr_t); } } return (0); } /* * Write vnode op from an mbuf list. */ int nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable, struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) { struct iovec *iv; int cnt, ioflags, error; struct uio io, *uiop = &io; struct nfsheur *nh; /* * Attempt to write to a DS file. A return of ENOENT implies * there is no DS file to write. */ error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS, &mp, cp, NULL, NULL, NULL, NULL, 0, NULL); if (error != ENOENT) { *stable = NFSWRITE_FILESYNC; return (error); } if (*stable == NFSWRITE_UNSTABLE) ioflags = IO_NODELOCKED; else ioflags = (IO_SYNC | IO_NODELOCKED); error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt); if (error != 0) return (error); uiop->uio_iov = iv; uiop->uio_iovcnt = cnt; uiop->uio_resid = retlen; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; NFSUIOPROC(uiop, p); uiop->uio_offset = off; nh = nfsrv_sequential_heuristic(uiop, vp); ioflags |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; error = VOP_WRITE(vp, uiop, ioflags, cred); if (error == 0) nh->nh_nextoff = uiop->uio_offset; free(iv, M_TEMP); NFSEXITCODE(error); return (error); } /* * Common code for creating a regular file (plus special files for V2). */ int nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp) { u_quad_t tempsize; int error; struct thread *p = curthread; error = nd->nd_repstat; if (!error && ndp->ni_vp == NULL) { if (nvap->na_type == VREG || nvap->na_type == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); /* For a pNFS server, create the data file on a DS. */ if (error == 0 && nvap->na_type == VREG) { /* * Create a data file on a DS for a pNFS server. * This function just returns if not * running a pNFS DS or the creation fails. */ nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, nd->nd_cred, p); } VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false); nfsvno_relpathbuf(ndp); if (!error) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; error = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, nd->nd_cred); if (error != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; error = NFSERR_NOTSUPP; } } } /* * NFS V2 Only. nfsrvd_mknod() does this for V3. * (This implies, just get out on an error.) */ } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || nvap->na_type == VFIFO) { if (nvap->na_type == VCHR && rdev == 0xffffffff) nvap->na_type = VFIFO; if (nvap->na_type != VFIFO && (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } nvap->na_rdev = rdev; error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); if (error) goto out; } else { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = ENXIO; goto out; } *vpp = ndp->ni_vp; } else { /* * Handle cases where error is already set and/or * the file exists. * 1 - clean up the lookup * 2 - iff !error and na_size set, truncate it */ vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); *vpp = ndp->ni_vp; if (ndp->ni_dvp == *vpp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (!error && nvap->na_size != VNOVAL) { error = nfsvno_accchk(*vpp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (!error) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; error = VOP_SETATTR(*vpp, &nvap->na_vattr, nd->nd_cred); } } if (error) vput(*vpp); } out: NFSEXITCODE(error); return (error); } /* * Do a mknod vnode op. */ int nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p) { int error = 0; enum vtype vtyp; vtyp = nvap->na_type; /* * Iff doesn't exist, create it. */ if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = NFSERR_BADTYPE; goto out; } if (vtyp == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false); nfsvno_relpathbuf(ndp); } else { if (nvap->na_type != VFIFO && (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); /* * Since VOP_MKNOD returns the ni_vp, I can't * see any reason to do the lookup. */ } out: NFSEXITCODE(error); return (error); } /* * Mkdir vnode op. */ int nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp != NULL) { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); nfsvno_relpathbuf(ndp); error = EEXIST; goto out; } error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false); nfsvno_relpathbuf(ndp); out: NFSEXITCODE(error); return (error); } /* * symlink vnode op. */ int nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr, pathcp); /* * Although FreeBSD still had the lookup code in * it for 7/current, there doesn't seem to be any * point, since VOP_SYMLINK() returns the ni_vp. * Just vput it for v2. */ VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, !not_v2 && error == 0); vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); out: NFSEXITCODE(error); return (error); } /* * Parse symbolic link arguments. * This function has an ugly side effect. It will malloc() an area for * the symlink and set iov_base to point to it, only if it succeeds. * So, if it returns with uiop->uio_iov->iov_base != NULL, that must * be FREE'd later. */ int nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, struct thread *p, char **pathcpp, int *lenp) { u_int32_t *tl; char *pathcp = NULL; int error = 0, len; struct nfsv2_sattr *sp; *pathcpp = NULL; *lenp = 0; if ((nd->nd_flag & ND_NFSV3) && (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len > NFS_MAXPATHLEN || len <= 0) { error = EBADRPC; goto nfsmout; } pathcp = malloc(len + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, pathcp, len); if (error) goto nfsmout; if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); } *pathcpp = pathcp; *lenp = len; NFSEXITCODE2(0, nd); return (0); nfsmout: if (pathcp) free(pathcp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Remove a non-directory object. */ int nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS]; int error = 0, mirrorcnt; char fname[PNFS_FILENAME_LEN + 1]; fhandle_t fh; vp = ndp->ni_vp; dsdvp[0] = NULL; if (vp->v_type == VDIR) error = NFSERR_ISDIR; else if (is_v4) error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0), p); if (error == 0) nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); if (!error) error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); if (error == 0 && dsdvp[0] != NULL) nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Remove a directory. */ int nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp; int error = 0; vp = ndp->ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (ndp->ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_vflag & VV_ROOT) error = EBUSY; out: if (!error) error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Rename vnode op. */ int nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) { struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS]; int error = 0, mirrorcnt; char fname[PNFS_FILENAME_LEN + 1]; fhandle_t fh; dsdvp[0] = NULL; fvp = fromndp->ni_vp; if (ndstat) { vrele(fromndp->ni_dvp); vrele(fvp); error = ndstat; goto out1; } tdvp = tondp->ni_dvp; tvp = tondp->ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; goto out; } if (tvp->v_type == VDIR && tvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } /* * A rename to '.' or '..' results in a prematurely * unlocked vnode on FreeBSD5, so I'm just going to fail that * here. */ if ((tondp->ni_cnd.cn_namelen == 1 && tondp->ni_cnd.cn_nameptr[0] == '.') || (tondp->ni_cnd.cn_namelen == 2 && tondp->ni_cnd.cn_nameptr[0] == '.' && tondp->ni_cnd.cn_nameptr[1] == '.')) { error = EINVAL; goto out; } } if (fvp->v_type == VDIR && fvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp->v_mount != tdvp->v_mount) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp == tdvp) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; goto out; } if (fvp == tvp) { /* * If source and destination are the same, there is nothing to * do. Set error to -1 to indicate this. */ error = -1; goto out; } if (ndflag & ND_NFSV4) { if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(fvp, 0, NULL, (nfsquad_t)((u_quad_t)0), p); NFSVOPUNLOCK(fvp); } else error = EPERM; if (tvp && !error) error = nfsrv_checkremove(tvp, 1, NULL, (nfsquad_t)((u_quad_t)0), p); } else { /* * For NFSv2 and NFSv3, try to get rid of the delegation, so * that the NFSv4 client won't be confused by the rename. * Since nfsd_recalldelegation() can only be called on an * unlocked vnode at this point and fvp is the file that will * still exist after the rename, just do fvp. */ nfsd_recalldelegation(fvp, p); } if (error == 0 && tvp != NULL) { nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh); NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" " dsdvp=%p\n", dsdvp[0]); } out: if (!error) { error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, &tondp->ni_cnd); } else { if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fromndp->ni_dvp); vrele(fvp); if (error == -1) error = 0; } /* * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and * if the rename succeeded, the DS file for the tvp needs to be * removed. */ if (error == 0 && dsdvp[0] != NULL) { nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); } vrele(tondp->ni_startdir); nfsvno_relpathbuf(tondp); out1: vrele(fromndp->ni_startdir); nfsvno_relpathbuf(fromndp); NFSEXITCODE(error); return (error); } /* * Link vnode op. */ int nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *xp; int error = 0; xp = ndp->ni_vp; if (xp != NULL) { error = EEXIST; } else { xp = ndp->ni_dvp; if (vp->v_mount != xp->v_mount) error = EXDEV; } if (!error) { NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (!VN_IS_DOOMED(vp)) error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); else error = EPERM; if (ndp->ni_dvp == vp) { vrele(ndp->ni_dvp); NFSVOPUNLOCK(vp); } else { vref(vp); VOP_VPUT_PAIR(ndp->ni_dvp, &vp, true); } } else { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vrele(ndp->ni_vp); } nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Do the fsync() appropriate for the commit. */ int nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, struct thread *td) { int error = 0; /* * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of * file is done. At this time VOP_FSYNC does not accept offset and * byte count parameters so call VOP_FSYNC the whole file for now. * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. * File systems that do not use the buffer cache (as indicated * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). */ if (cnt == 0 || cnt > MAX_COMMIT_COUNT || (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { /* * Give up and do the whole thing */ if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); } else { /* * Locate and synchronously write any buffers that fall * into the requested range. Note: we are assuming that * f_iosize is a power of 2. */ int iosize = vp->v_mount->mnt_stat.f_iosize; int iomask = iosize - 1; struct bufobj *bo; daddr_t lblkno; /* * Align to iosize boundary, super-align to page boundary. */ if (off & iomask) { cnt += off & iomask; off &= ~(u_quad_t)iomask; } if (off & PAGE_MASK) { cnt += off & PAGE_MASK; off &= ~(u_quad_t)PAGE_MASK; } lblkno = off / iosize; if (vp->v_object && vm_object_mightbedirty(vp->v_object)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, off, off + cnt, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } bo = &vp->v_bufobj; BO_LOCK(bo); while (cnt > 0) { struct buf *bp; /* * If we have a buffer and it is marked B_DELWRI we * have to lock and write it. Otherwise the prior * write is assumed to have already been committed. * * gbincore() can return invalid buffers now so we * have to check that bit as well (though B_DELWRI * should not be set if B_INVAL is set there could be * a race here since we haven't locked the buffer). */ if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { BO_LOCK(bo); continue; /* retry */ } if ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI) { bremfree(bp); bp->b_flags &= ~B_ASYNC; bwrite(bp); ++nfs_commit_miss; } else BUF_UNLOCK(bp); BO_LOCK(bo); } ++nfs_commit_blks; if (cnt < iosize) break; cnt -= iosize; ++lblkno; } BO_UNLOCK(bo); } NFSEXITCODE(error); return (error); } /* * Statfs vnode op. */ int nfsvno_statfs(struct vnode *vp, struct statfs *sf) { struct statfs *tsf; int error; tsf = NULL; if (nfsrv_devidcnt > 0) { /* For a pNFS service, get the DS numbers. */ tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); error = nfsrv_pnfsstatfs(tsf, vp->v_mount); if (error != 0) { free(tsf, M_TEMP); tsf = NULL; } } error = VFS_STATFS(vp->v_mount, sf); if (error == 0) { if (tsf != NULL) { sf->f_blocks = tsf->f_blocks; sf->f_bavail = tsf->f_bavail; sf->f_bfree = tsf->f_bfree; sf->f_bsize = tsf->f_bsize; } /* * Since NFS handles these values as unsigned on the * wire, there is no way to represent negative values, * so set them to 0. Without this, they will appear * to be very large positive values for clients like * Solaris10. */ if (sf->f_bavail < 0) sf->f_bavail = 0; if (sf->f_ffree < 0) sf->f_ffree = 0; } free(tsf, M_TEMP); NFSEXITCODE(error); return (error); } /* * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but * must handle nfsrv_opencheck() calls after any other access checks. */ void nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct nfsexstuff *exp, struct vnode **vpp) { struct vnode *vp = NULL; u_quad_t tempsize; struct nfsexstuff nes; struct thread *p = curthread; if (ndp->ni_vp == NULL) nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, NULL, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { if (ndp->ni_vp == NULL) { vrele(ndp->ni_startdir); nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); /* For a pNFS server, create the data file on a DS. */ if (nd->nd_repstat == 0) { /* * Create a data file on a DS for a pNFS server. * This function just returns if not * running a pNFS DS or the creation fails. */ nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, cred, p); } VOP_VPUT_PAIR(ndp->ni_dvp, nd->nd_repstat == 0 ? &ndp->ni_vp : NULL, false); nfsvno_relpathbuf(ndp); if (!nd->nd_repstat) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, cred); if (nd->nd_repstat != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; nd->nd_repstat = NFSERR_NOTSUPP; } else NFSSETBIT_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS); } else { nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp); } } vp = ndp->ni_vp; } else { if (ndp->ni_startdir) vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vp = ndp->ni_vp; if (create == NFSV4OPEN_CREATE) { if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); } if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { if (ndp->ni_cnd.cn_flags & RDONLY) NFSVNO_SETEXRDONLY(&nes); else NFSVNO_EXINIT(&nes); nd->nd_repstat = nfsvno_accchk(vp, VWRITE, cred, &nes, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; nd->nd_repstat = VOP_SETATTR(vp, &nvap->na_vattr, cred); } } else if (vp->v_type == VREG) { nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); } } } else { if (ndp->ni_cnd.cn_flags & HASBUF) nfsvno_relpathbuf(ndp); if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { vrele(ndp->ni_startdir); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vput(ndp->ni_vp); } } *vpp = vp; NFSEXITCODE2(0, nd); } /* * Updates the file rev and sets the mtime and ctime * to the current clock time, returning the va_filerev and va_Xtime * values. * Return ESTALE to indicate the vnode is VIRF_DOOMED. */ int nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, struct nfsrv_descript *nd, struct thread *p) { struct vattr va; VATTR_NULL(&va); vfs_timestamp(&va.va_mtime); if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); if (VN_IS_DOOMED(vp)) return (ESTALE); } (void) VOP_SETATTR(vp, &va, nd->nd_cred); (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); return (0); } /* * Glue routine to nfsv4_fillattr(). */ int nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) { struct statfs *sf; int error; sf = NULL; if (nfsrv_devidcnt > 0 && (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); error = nfsrv_pnfsstatfs(sf, mp); if (error != 0) { free(sf, M_TEMP); sf = NULL; } } error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, mounted_on_fileno, sf); free(sf, M_TEMP); NFSEXITCODE2(0, nd); return (error); } /* Since the Readdir vnode ops vary, put the entire functions in here. */ /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR * - calls VOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The NFSM_CLGET macro is used here so that the reply will be packed * tightly in mbuf clusters. * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. * - it trims out records with d_type == DT_WHT * these cannot be seen through NFS (unless we extend the protocol) * The alternate call nfsrvd_readdirplus() does lookups as well. * PS: The NFS protocol spec. does not clarify what the "count" byte * argument is a count of.. just name strings and file id's or the * entire reply rpc or ... * I tried just file name and id sizes and it confused the Sun client, * so I am using the full rpc size now. The "paranoia.." comment refers * to including the status longwords that are not a part of the dir. * "entry" structures, but are in the rpc. */ int nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct nfsvattr at; int nlen, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, ncookies; u_int64_t off, toff, verf __unused; u_long *cookies = NULL, *cookiep; struct uio io; struct iovec iv; int is_ufs; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); off = fxdr_unsigned(u_quad_t, *tl++); } else { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; verf = fxdr_hyper(tl); tl += 2; } toff = off; cnt = fxdr_unsigned(int, *tl); if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); fullsiz = siz; if (nd->nd_flag & ND_NFSV3) { nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); #if 0 /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ if (!nd->nd_repstat && toff && verf != at.na_filerev) nd->nd_repstat = NFSERR_BAD_COOKIE; #endif } if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (nd->nd_repstat == 0 && cnt == 0) { if (nd->nd_flag & ND_NFSV2) /* NFSv2 does not have NFSERR_TOOSMALL */ nd->nd_repstat = EPERM; else nd->nd_repstat = NFSERR_TOOSMALL; } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; rbuf = malloc(siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (nd->nd_flag & ND_NFSV3) { getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; } /* * Handles the failed cases. nd->nd_repstat == 0 past here. */ if (nd->nd_repstat) { vput(vp); free(rbuf, M_TEMP); if (cookies) free(cookies, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); } else { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; } *tl++ = newnfs_false; *tl = newnfs_true; free(rbuf, M_TEMP); free(cookies, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } vput(vp); /* * If cnt > MCLBYTES and the reply will not be saved, use * ext_pgs mbufs for TLS. * For NFSv4.0, we do not know for sure if the reply will * be saved, so do not use ext_pgs mbufs for NFSv4.0. */ if (cnt > MCLBYTES && siz > MCLBYTES && (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) nd->nd_flag |= ND_EXTPG; /* * dirlen is the size of the reply, including all XDR and must * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate * if the XDR should be included in "count", but to be safe, we do. * (Include the two booleans at the end of the reply in dirlen now.) */ if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; } else { dirlen = 2 * NFSX_UNSIGNED; } /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN) { if (nd->nd_flag & ND_NFSV3) dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); else dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); if (dirlen > cnt) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; } else { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; } *tl = txdr_unsigned(dp->d_fileno); (void) nfsm_strtom(nd, dp->d_name, nlen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; } else NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(*cookiep); } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos < cend) eofflag = 0; NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; free(rbuf, M_TEMP); free(cookies, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Readdirplus for V3 and Readdir for V4. */ int nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct vnode *nvp; fhandle_t nfh; struct nfsvattr nva, at, *nvap = &nva; struct mbuf *mb0, *mb1; struct nfsreferral *refp; int nlen, r, error = 0, getret = 1, usevget = 1; int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; caddr_t bpos0, bpos1; u_int64_t off, toff, verf; u_long *cookies = NULL, *cookiep; nfsattrbit_t attrbits, rderrbits, savbits; struct uio io; struct iovec iv; struct componentname cn; int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; struct mount *mp, *new_mp; uint64_t mounted_on_fileno; struct thread *p = curthread; int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); off = fxdr_hyper(tl); toff = off; tl += 2; verf = fxdr_hyper(tl); tl += 2; siz = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); /* * Use the server's maximum data transfer size as the upper bound * on reply datalen. */ if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); /* * siz is a "hint" of how much directory information (name, fileid, * cookie) should be in the reply. At least one client "hints" 0, * so I set it to cnt for that case. I also round it up to the * next multiple of DIRBLKSIZ. * Since the size of a Readdirplus directory entry reply will always * be greater than a directory entry returned by VOP_READDIR(), it * does not make sense to read more than NFS_SRVMAXDATA() via * VOP_READDIR(). */ if (siz <= 0) siz = cnt; else if (siz > NFS_SRVMAXDATA(nd)) siz = NFS_SRVMAXDATA(nd); siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); if (nd->nd_flag & ND_NFSV4) { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; NFSSET_ATTRBIT(&savbits, &attrbits); NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd); NFSZERO_ATTRBIT(&rderrbits); NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); } else { NFSZERO_ATTRBIT(&attrbits); } fullsiz = siz; nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); #if 0 if (!nd->nd_repstat) { if (off && verf != at.na_filerev) { /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ if (nd->nd_flag & ND_NFSV4) { nd->nd_repstat = NFSERR_NOTSAME; } else { nd->nd_repstat = NFSERR_BAD_COOKIE; } } } #endif if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (!nd->nd_repstat && cnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; rbuf = malloc(siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (!nd->nd_repstat) nd->nd_repstat = getret; if (nd->nd_repstat) { vput(vp); if (cookies) free(cookies, M_TEMP); free(rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; *tl++ = newnfs_false; *tl = newnfs_true; free(cookies, M_TEMP); free(rbuf, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || ((nd->nd_flag & ND_NFSV4) && ((dp->d_namlen == 1 && dp->d_name[0] == '.') || (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } /* * Busy the file system so that the mount point won't go away * and, as such, VFS_VGET() can be used safely. */ mp = vp->v_mount; vfs_ref(mp); NFSVOPUNLOCK(vp); nd->nd_repstat = vfs_busy(mp, 0); vfs_rel(mp); if (nd->nd_repstat != 0) { vrele(vp); free(cookies, M_TEMP); free(rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * Check to see if entries in this directory can be safely acquired * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. * ZFS snapshot directories need VOP_LOOKUP(), so that any * automount of the snapshot directory that is required will * be done. * This needs to be done here for NFSv4, since NFSv4 never does * a VFS_VGET() for "." or "..". */ if (is_zfs == 1) { r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); if (r == EOPNOTSUPP) { usevget = 0; cn.cn_nameiop = LOOKUP; cn.cn_lkflags = LK_SHARED | LK_RETRY; cn.cn_cred = nd->nd_cred; cn.cn_thread = p; } else if (r == 0) vput(nvp); } /* * If the reply is likely to exceed MCLBYTES and the reply will * not be saved, use ext_pgs mbufs for TLS. * It is difficult to predict how large each entry will be and * how many entries have been read, so just assume the directory * entries grow by a factor of 4 when attributes are included. * For NFSv4.0, we do not know for sure if the reply will * be saved, so do not use ext_pgs mbufs for NFSv4.0. */ if (cnt > MCLBYTES && siz > MCLBYTES / 4 && (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS && (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4) nd->nd_flag |= ND_EXTPG; /* * Save this position, in case there is an error before one entry * is created. */ mb0 = nd->nd_mb; bpos0 = nd->nd_bpos; bextpg0 = nd->nd_bextpg; bextpgsiz0 = nd->nd_bextpgsiz; /* * Fill in the first part of the reply. * dirlen is the reply length in bytes and cannot exceed cnt. * (Include the two booleans at the end of the reply in dirlen now, * so we recognize when we have exceeded cnt.) */ if (nd->nd_flag & ND_NFSV3) { dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; nfsrv_postopattr(nd, getret, &at); } else { dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; } NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); txdr_hyper(at.na_filerev, tl); /* * Save this position, in case there is an empty reply needed. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; bextpg1 = nd->nd_bextpg; bextpgsiz1 = nd->nd_bextpgsiz; /* Loop through the records and build reply */ entrycnt = 0; while (cpos < cend && ncookies > 0 && dirlen < cnt) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN && ((nd->nd_flag & ND_NFSV3) || nlen > 2 || (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) || (nlen == 1 && dp->d_name[0] != '.'))) { /* * Save the current position in the reply, in case * this entry exceeds cnt. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; bextpg1 = nd->nd_bextpg; bextpgsiz1 = nd->nd_bextpgsiz; /* * For readdir_and_lookup get the vnode using * the file number. */ nvp = NULL; refp = NULL; r = 0; at_root = 0; needs_unbusy = 0; new_mp = mp; mounted_on_fileno = (uint64_t)dp->d_fileno; if ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&savbits)) { if (nd->nd_flag & ND_NFSV4) refp = nfsv4root_getreferral(NULL, vp, dp->d_fileno); if (refp == NULL) { if (usevget) r = VFS_VGET(mp, dp->d_fileno, LK_SHARED, &nvp); else r = EOPNOTSUPP; if (r == EOPNOTSUPP) { if (usevget) { usevget = 0; cn.cn_nameiop = LOOKUP; cn.cn_lkflags = LK_SHARED | LK_RETRY; cn.cn_cred = nd->nd_cred; cn.cn_thread = p; } cn.cn_nameptr = dp->d_name; cn.cn_namelen = nlen; cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF; if (nlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.') cn.cn_flags |= ISDOTDOT; if (NFSVOPLOCK(vp, LK_SHARED) != 0) { nd->nd_repstat = EPERM; break; } if ((vp->v_vflag & VV_ROOT) != 0 && (cn.cn_flags & ISDOTDOT) != 0) { vref(vp); nvp = vp; r = 0; } else { r = VOP_LOOKUP(vp, &nvp, &cn); if (vp != nvp) NFSVOPUNLOCK(vp); } } /* * For NFSv4, check to see if nvp is * a mount point and get the mount * point vnode, as required. */ if (r == 0 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && nvp->v_mountedhere != NULL) { new_mp = nvp->v_mountedhere; r = vfs_busy(new_mp, 0); vput(nvp); nvp = NULL; if (r == 0) { r = VFS_ROOT(new_mp, LK_SHARED, &nvp); needs_unbusy = 1; if (r == 0) at_root = 1; } } } /* * If we failed to look up the entry, then it * has become invalid, most likely removed. */ if (r != 0) { if (needs_unbusy) vfs_unbusy(new_mp); goto invalid; } KASSERT(refp != NULL || nvp != NULL, ("%s: undetected lookup error", __func__)); if (refp == NULL && ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&attrbits))) { r = nfsvno_getfh(nvp, &nfh, p); if (!r) r = nfsvno_getattr(nvp, nvap, nd, p, 1, &attrbits); if (r == 0 && is_zfs == 1 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && vp->v_mount != nvp->v_mount) { /* * For a ZFS snapshot, there is a * pseudo mount that does not set * v_mountedhere, so it needs to * be detected via a different * mount structure. */ at_root = 1; if (new_mp == mp) new_mp = nvp->v_mount; } } /* * If we failed to get attributes of the entry, * then just skip it for NFSv3 (the traditional * behavior in the old NFS server). * For NFSv4 the behavior is controlled by * RDATTRERROR: we either ignore the error or * fail the request. * Note that RDATTRERROR is never set for NFSv3. */ if (r != 0) { if (!NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR)) { vput(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); if ((nd->nd_flag & ND_NFSV3)) goto invalid; nd->nd_repstat = r; break; } } } /* * Build the directory record xdr */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(dp->d_fileno); dirlen += nfsm_strtom(nd, dp->d_name, nlen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = txdr_unsigned(*cookiep); nfsrv_postopattr(nd, 0, nvap); dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); if (nvp != NULL) vput(nvp); } else { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(*cookiep); dirlen += nfsm_strtom(nd, dp->d_name, nlen); if (nvp != NULL) { supports_nfsv4acls = nfs_supportsnfsv4acls(nvp); NFSVOPUNLOCK(nvp); } else supports_nfsv4acls = 0; if (refp != NULL) { dirlen += nfsrv_putreferralattr(nd, &savbits, refp, 0, &nd->nd_repstat); if (nd->nd_repstat) { if (nvp != NULL) vrele(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); break; } } else if (r) { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &rderrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } else { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &attrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } if (nvp != NULL) vrele(nvp); dirlen += (3 * NFSX_UNSIGNED); } if (needs_unbusy != 0) vfs_unbusy(new_mp); if (dirlen <= cnt) entrycnt++; } invalid: cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } vrele(vp); vfs_unbusy(mp); /* * If dirlen > cnt, we must strip off the last entry. If that * results in an empty reply, report NFSERR_TOOSMALL. */ if (dirlen > cnt || nd->nd_repstat) { if (!nd->nd_repstat && entrycnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (nd->nd_repstat) { nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); } else nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1); eofflag = 0; } else if (cpos < cend) eofflag = 0; if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; } free(cookies, M_TEMP); free(rbuf, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Get the settable attributes out of the mbuf list. * (Return 0 or EBADRPC) */ int nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; struct nfsv2_sattr *sp; int error = 0, toclient = 0; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); /* * Some old clients didn't fill in the high order 16bits. * --> check the low order 2 bytes for 0xffff */ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) nvap->na_mode = nfstov_mode(sp->sa_mode); if (sp->sa_uid != newnfs_xdrneg1) nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); if (sp->sa_gid != newnfs_xdrneg1) nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); if (sp->sa_size != newnfs_xdrneg1) nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { #ifdef notyet fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); #else nvap->na_atime.tv_sec = fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); nvap->na_atime.tv_nsec = 0; #endif } if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); break; case ND_NFSV3: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_uid = fxdr_unsigned(uid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_gid = fxdr_unsigned(gid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); nvap->na_size = fxdr_hyper(tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_atime); toclient = 1; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; break; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; break; } break; case ND_NFSV4: error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Handle the setable attributes for V4. * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. */ int nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; int attrsum = 0; int i, j; int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; int moderet, toclient = 0; u_char *cp, namestr[NFSV4_SMALLSTR + 1]; uid_t uid; gid_t gid; u_short mode, mask; /* Same type as va_mode. */ struct vattr va; error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsize = fxdr_unsigned(int, *tl); /* * Loop around getting the setable attributes. If an unsupported * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. */ if (retnotsup) { nd->nd_repstat = NFSERR_ATTRNOTSUPP; bitpos = NFSATTRBIT_MAX; } else { bitpos = 0; } moderet = 0; for (; bitpos < NFSATTRBIT_MAX; bitpos++) { if (attrsum > attrsize) { error = NFSERR_BADXDR; goto nfsmout; } if (NFSISSET_ATTRBIT(attrbitp, bitpos)) switch (bitpos) { case NFSATTRBIT_SIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (vp != NULL && vp->v_type != VREG) { error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL; goto nfsmout; } nvap->na_size = fxdr_hyper(tl); attrsum += NFSX_HYPER; break; case NFSATTRBIT_ACL: error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, p); if (error) goto nfsmout; if (aceerr && !nd->nd_repstat) nd->nd_repstat = aceerr; attrsum += aclsize; break; case NFSATTRBIT_ARCHIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HIDDEN: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MIMETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); break; case NFSATTRBIT_MODE: moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid); if (!nd->nd_repstat) nvap->na_uid = uid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_OWNERGROUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid); if (!nd->nd_repstat) nvap->na_gid = gid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_SYSTEM: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESSSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_atime); toclient = 1; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; } break; case NFSATTRBIT_TIMEBACKUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_btime); attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; } break; case NFSATTRBIT_MODESETMASKED: NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); mode = fxdr_unsigned(u_short, *tl++); mask = fxdr_unsigned(u_short, *tl); /* * vp == NULL implies an Open/Create operation. * This attribute can only be used for Setattr and * only for NFSv4.1 or higher. * If moderet != 0, a mode attribute has also been * specified and this attribute cannot be done in the * same Setattr operation. */ if ((nd->nd_flag & ND_NFSV41) == 0) nd->nd_repstat = NFSERR_ATTRNOTSUPP; else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 || vp == NULL) nd->nd_repstat = NFSERR_INVAL; else if (moderet == 0) moderet = VOP_GETATTR(vp, &va, nd->nd_cred); if (moderet == 0) nvap->na_mode = (mode & mask) | (va.va_mode & ~mask); else nd->nd_repstat = moderet; attrsum += 2 * NFSX_UNSIGNED; break; default: nd->nd_repstat = NFSERR_ATTRNOTSUPP; /* * set bitpos so we drop out of the loop. */ bitpos = NFSATTRBIT_MAX; break; } } /* * some clients pad the attrlist, so we need to skip over the * padding. */ if (attrsum > attrsize) { error = NFSERR_BADXDR; } else { attrsize = NFSM_RNDUP(attrsize); if (attrsum < attrsize) error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Check/setup export credentials. */ int nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, struct ucred *credanon, bool testsec) { int error; /* * Check/setup credentials. */ if (nd->nd_flag & ND_GSS) exp->nes_exflag &= ~MNT_EXPORTANON; /* * Check to see if the operation is allowed for this security flavor. */ error = 0; if (testsec) { error = nfsvno_testexp(nd, exp); if (error != 0) goto out; } /* * Check to see if the file system is exported V4 only. */ if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { error = NFSERR_PROGNOTV4; goto out; } /* * Now, map the user credentials. * (Note that ND_AUTHNONE will only be set for an NFSv3 * Fsinfo RPC. If set for anything else, this code might need * to change.) */ if (NFSVNO_EXPORTED(exp)) { if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || NFSVNO_EXPORTANON(exp) || (nd->nd_flag & ND_AUTHNONE) != 0) { nd->nd_cred->cr_uid = credanon->cr_uid; nd->nd_cred->cr_gid = credanon->cr_gid; crsetgroups(nd->nd_cred, credanon->cr_ngroups, credanon->cr_groups); } else if ((nd->nd_flag & ND_GSS) == 0) { /* * If using AUTH_SYS, call nfsrv_getgrpscred() to see * if there is a replacement credential with a group * list set up by "nfsuserd -manage-gids". * If there is no replacement, nfsrv_getgrpscred() * simply returns its argument. */ nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); } } out: NFSEXITCODE2(error, nd); return (error); } /* * Check exports. */ int nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, struct ucred **credp) { int error; error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, exp->nes_secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > MAXSECFLAVORS) { printf("nfsvno_checkexp: numsecflavors out of range\n"); exp->nes_numsecflavor = 0; error = EACCES; } NFSEXITCODE(error); return (error); } /* * Get a vnode for a file handle and export stuff. */ int nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct ucred **credp) { int error; *credp = NULL; exp->nes_numsecflavor = 0; error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); if (error != 0) /* Make sure the server replies ESTALE to the client. */ error = ESTALE; if (nam && !error) { error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, exp->nes_secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } else { vput(*vpp); } } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor > MAXSECFLAVORS) { printf("nfsvno_fhtovp: numsecflavors out of range\n"); exp->nes_numsecflavor = 0; error = EACCES; vput(*vpp); } } NFSEXITCODE(error); return (error); } /* * nfsd_fhtovp() - convert a fh to a vnode ptr * - look up fsid in mount list (if not found ret error) * - get vp and export rights by calling nfsvno_fhtovp() * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon * for AUTH_SYS * - if mpp != NULL, return the mount point so that it can * be used for vn_finished_write() by the caller */ void nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct mount **mpp, int startwrite, int nextop) { struct mount *mp, *mpw; struct ucred *credanon; fhandle_t *fhp; int error; if (mpp != NULL) *mpp = NULL; *vpp = NULL; fhp = (fhandle_t *)nfp->nfsrvfh_data; mp = vfs_busyfs(&fhp->fh_fsid); if (mp == NULL) { nd->nd_repstat = ESTALE; goto out; } if (startwrite) { mpw = mp; error = vn_start_write(NULL, &mpw, V_WAIT); if (error != 0) { mpw = NULL; vfs_unbusy(mp); nd->nd_repstat = ESTALE; goto out; } if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) lktype = LK_EXCLUSIVE; } else mpw = NULL; nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, &credanon); vfs_unbusy(mp); /* * For NFSv4 without a pseudo root fs, unexported file handles * can be returned, so that Lookup works everywhere. */ if (!nd->nd_repstat && exp->nes_exflag == 0 && !(nd->nd_flag & ND_NFSV4)) { vput(*vpp); *vpp = NULL; nd->nd_repstat = EACCES; } /* * Personally, I've never seen any point in requiring a * reserved port#, since only in the rare case where the * clients are all boxes with secure system privileges, * does it provide any enhanced security, but... some people * believe it to be useful and keep putting this code back in. * (There is also some "security checker" out there that * complains if the nfs server doesn't enforce this.) * However, note the following: * RFC3530 (NFSv4) specifies that a reserved port# not be * required. * RFC2623 recommends that, if a reserved port# is checked for, * that there be a way to turn that off--> ifdef'd. */ #ifdef NFS_REQRSVPORT if (!nd->nd_repstat) { struct sockaddr_in *saddr; struct sockaddr_in6 *saddr6; saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); if (!(nd->nd_flag & ND_NFSV4) && ((saddr->sin_family == AF_INET && ntohs(saddr->sin_port) >= IPPORT_RESERVED) || (saddr6->sin6_family == AF_INET6 && ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { vput(*vpp); nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); } } #endif /* NFS_REQRSVPORT */ /* * Check/setup credentials. */ if (!nd->nd_repstat) { nd->nd_saveduid = nd->nd_cred->cr_uid; nd->nd_repstat = nfsd_excred(nd, exp, credanon, nfsrv_checkwrongsec(nd, nextop, (*vpp)->v_type)); if (nd->nd_repstat) vput(*vpp); } if (credanon != NULL) crfree(credanon); if (nd->nd_repstat) { vn_finished_write(mpw); *vpp = NULL; } else if (mpp != NULL) { *mpp = mpw; } out: NFSEXITCODE2(0, nd); } /* * glue for fp. */ static int fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) { struct filedesc *fdp; struct file *fp; int error = 0; fdp = p->td_proc->p_fd; if (fd < 0 || fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { error = EBADF; goto out; } *fpp = fp; out: NFSEXITCODE(error); return (error); } /* * Called from nfssvc() to update the exports list. Just call * vfs_export(). This has to be done, since the v4 root fake fs isn't * in the mount list. */ int nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) { struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; int error = 0; struct nameidata nd; fhandle_t fh; error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) nfs_rootfhset = 0; else if (error == 0) { if (nfsexargp->fspec == NULL) { error = EPERM; goto out; } /* * If fspec != NULL, this is the v4root path. */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec, p); if ((error = namei(&nd)) != 0) goto out; error = nfsvno_getfh(nd.ni_vp, &fh, p); vrele(nd.ni_vp); if (!error) { nfs_rootfh.nfsrvfh_len = NFSX_MYFH; NFSBCOPY((caddr_t)&fh, nfs_rootfh.nfsrvfh_data, sizeof (fhandle_t)); nfs_rootfhset = 1; } } out: NFSEXITCODE(error); return (error); } /* * This function needs to test to see if the system is near its limit * for memory allocation via malloc() or mget() and return True iff * either of these resources are near their limit. * XXX (For now, this is just a stub.) */ int nfsrv_testmalloclimit = 0; int nfsrv_mallocmget_limit(void) { static int printmesg = 0; static int testval = 1; if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { if ((printmesg++ % 100) == 0) printf("nfsd: malloc/mget near limit\n"); return (1); } return (0); } /* * BSD specific initialization of a mount point. */ void nfsd_mntinit(void) { static int inited = 0; if (inited) return; inited = 1; nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); TAILQ_INIT(&nfsv4root_mnt.mnt_lazyvnodelist); nfsv4root_mnt.mnt_export = NULL; TAILQ_INIT(&nfsv4root_opt); TAILQ_INIT(&nfsv4root_newopt); nfsv4root_mnt.mnt_opt = &nfsv4root_opt; nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; nfsv4root_mnt.mnt_nvnodelistsize = 0; nfsv4root_mnt.mnt_lazyvnodelistsize = 0; } /* * Get a vnode for a file handle, without checking exports, etc. */ struct vnode * nfsvno_getvp(fhandle_t *fhp) { struct mount *mp; struct vnode *vp; int error; mp = vfs_busyfs(&fhp->fh_fsid); if (mp == NULL) return (NULL); error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error) return (NULL); return (vp); } /* * Do a local VOP_ADVLOCK(). */ int nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, u_int64_t end, struct thread *td) { int error = 0; struct flock fl; u_int64_t tlen; if (nfsrv_dolocallocks == 0) goto out; ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); fl.l_whence = SEEK_SET; fl.l_type = ftype; fl.l_start = (off_t)first; if (end == NFS64BITSSET) { fl.l_len = 0; } else { tlen = end - first; fl.l_len = (off_t)tlen; } /* * For FreeBSD8, the l_pid and l_sysid must be set to the same * values for all calls, so that all locks will be held by the * nfsd server. (The nfsd server handles conflicts between the * various clients.) * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 * bytes, so it can't be put in l_sysid. */ if (nfsv4_sysid == 0) nfsv4_sysid = nlm_acquire_next_sysid(); fl.l_pid = (pid_t)0; fl.l_sysid = (int)nfsv4_sysid; if (ftype == F_UNLCK) error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, (F_POSIX | F_REMOTE)); else error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, (F_POSIX | F_REMOTE)); out: NFSEXITCODE(error); return (error); } /* * Check the nfsv4 root exports. */ int nfsvno_v4rootexport(struct nfsrv_descript *nd) { struct ucred *credanon; int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i; uint64_t exflags; error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, &credanon, &numsecflavor, secflavors); if (error) { error = NFSERR_PROGUNAVAIL; goto out; } if (credanon != NULL) crfree(credanon); for (i = 0; i < numsecflavor; i++) { if (secflavors[i] == AUTH_SYS) nd->nd_flag |= ND_EXAUTHSYS; else if (secflavors[i] == RPCSEC_GSS_KRB5) nd->nd_flag |= ND_EXGSS; else if (secflavors[i] == RPCSEC_GSS_KRB5I) nd->nd_flag |= ND_EXGSSINTEGRITY; else if (secflavors[i] == RPCSEC_GSS_KRB5P) nd->nd_flag |= ND_EXGSSPRIVACY; } /* And set ND_EXxx flags for TLS. */ if ((exflags & MNT_EXTLS) != 0) { nd->nd_flag |= ND_EXTLS; if ((exflags & MNT_EXTLSCERT) != 0) nd->nd_flag |= ND_EXTLSCERT; if ((exflags & MNT_EXTLSCERTUSER) != 0) nd->nd_flag |= ND_EXTLSCERTUSER; } out: NFSEXITCODE(error); return (error); } /* * Nfs server pseudo system call for the nfsd's */ /* * MPSAFE */ static int nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) { struct file *fp; struct nfsd_addsock_args sockarg; struct nfsd_nfsd_args nfsdarg; struct nfsd_nfsd_oargs onfsdarg; struct nfsd_pnfsd_args pnfsdarg; struct vnode *vp, *nvp, *curdvp; struct pnfsdsfile *pf; struct nfsdevice *ds, *fds; cap_rights_t rights; int buflen, error, ret; char *buf, *cp, *cp2, *cp3; char fname[PNFS_FILENAME_LEN + 1]; if (uap->flag & NFSSVC_NFSDADDSOCK) { error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); if (error) goto out; /* * Since we don't know what rights might be required, * pretend that we need them all. It is better to be too * careful than too reckless. */ error = fget(td, sockarg.sock, cap_rights_init_one(&rights, CAP_SOCK_SERVER), &fp); if (error != 0) goto out; if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); error = EPERM; goto out; } error = nfsrvd_addsock(fp); fdrop(fp, td); } else if (uap->flag & NFSSVC_NFSDNFSD) { if (uap->argp == NULL) { error = EINVAL; goto out; } if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); if (error == 0) { nfsdarg.principal = onfsdarg.principal; nfsdarg.minthreads = onfsdarg.minthreads; nfsdarg.maxthreads = onfsdarg.maxthreads; nfsdarg.version = 1; nfsdarg.addr = NULL; nfsdarg.addrlen = 0; nfsdarg.dnshost = NULL; nfsdarg.dnshostlen = 0; nfsdarg.dspath = NULL; nfsdarg.dspathlen = 0; nfsdarg.mdspath = NULL; nfsdarg.mdspathlen = 0; nfsdarg.mirrorcnt = 1; } } else error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); if (error) goto out; if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 && nfsdarg.mirrorcnt >= 1 && nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) { NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen, nfsdarg.dspathlen, nfsdarg.dnshostlen, nfsdarg.mdspathlen, nfsdarg.mirrorcnt); cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); if (error != 0) { free(cp, M_TEMP); goto out; } cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ nfsdarg.addr = cp; cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); if (error != 0) { free(nfsdarg.addr, M_TEMP); free(cp, M_TEMP); goto out; } cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ nfsdarg.dnshost = cp; cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); if (error != 0) { free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(cp, M_TEMP); goto out; } cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ nfsdarg.dspath = cp; cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK); error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen); if (error != 0) { free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(nfsdarg.dspath, M_TEMP); free(cp, M_TEMP); goto out; } cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */ nfsdarg.mdspath = cp; } else { nfsdarg.addr = NULL; nfsdarg.addrlen = 0; nfsdarg.dnshost = NULL; nfsdarg.dnshostlen = 0; nfsdarg.dspath = NULL; nfsdarg.dspathlen = 0; nfsdarg.mdspath = NULL; nfsdarg.mdspathlen = 0; nfsdarg.mirrorcnt = 1; } error = nfsrvd_nfsd(td, &nfsdarg); free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(nfsdarg.dspath, M_TEMP); free(nfsdarg.mdspath, M_TEMP); } else if (uap->flag & NFSSVC_PNFSDS) { error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER || pnfsdarg.op == PNFSDOP_FORCEDELDS)) { cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, NULL); if (error == 0) error = nfsrv_deldsserver(pnfsdarg.op, cp, td); free(cp, M_TEMP); } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; buf = malloc(buflen, M_TEMP, M_WAITOK); error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, NULL); NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); if (error == 0 && pnfsdarg.dspath != NULL) { cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); error = copyinstr(pnfsdarg.dspath, cp2, PATH_MAX + 1, NULL); NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", error); } else cp2 = NULL; if (error == 0 && pnfsdarg.curdspath != NULL) { cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); error = copyinstr(pnfsdarg.curdspath, cp3, PATH_MAX + 1, NULL); NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", error); } else cp3 = NULL; curdvp = NULL; fds = NULL; if (error == 0) error = nfsrv_mdscopymr(cp, cp2, cp3, buf, &buflen, fname, td, &vp, &nvp, &pf, &ds, &fds); NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); if (error == 0) { if (pf->dsf_dir >= nfsrv_dsdirsize) { printf("copymr: dsdir out of range\n"); pf->dsf_dir = 0; } NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); error = nfsrv_copymr(vp, nvp, ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, (struct pnfsdsfile *)buf, buflen / sizeof(*pf), td->td_ucred, td); vput(vp); vput(nvp); if (fds != NULL && error == 0) { curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; ret = vn_lock(curdvp, LK_EXCLUSIVE); if (ret == 0) { nfsrv_dsremove(curdvp, fname, td->td_ucred, td); NFSVOPUNLOCK(curdvp); } } NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); } free(cp, M_TEMP); free(cp2, M_TEMP); free(cp3, M_TEMP); free(buf, M_TEMP); } } else { error = nfssvc_srvcall(td, uap, td->td_ucred); } out: NFSEXITCODE(error); return (error); } static int nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { struct nfsex_args export; struct nfsex_oldargs oexp; struct file *fp = NULL; int stablefd, i, len; struct nfsd_clid adminrevoke; struct nfsd_dumplist dumplist; struct nfsd_dumpclients *dumpclients; struct nfsd_dumplocklist dumplocklist; struct nfsd_dumplocks *dumplocks; struct nameidata nd; vnode_t vp; int error = EINVAL, igotlock; struct proc *procp; gid_t *grps; static int suspend_nfsd = 0; if (uap->flag & NFSSVC_PUBLICFH) { NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); error = copyin(uap->argp, &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); if (!error) nfs_pubfhset = 1; } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) { error = copyin(uap->argp,(caddr_t)&export, sizeof (struct nfsex_args)); if (!error) { grps = NULL; if (export.export.ex_ngroups > NGROUPS_MAX || export.export.ex_ngroups < 0) error = EINVAL; else if (export.export.ex_ngroups > 0) { grps = malloc(export.export.ex_ngroups * sizeof(gid_t), M_TEMP, M_WAITOK); error = copyin(export.export.ex_groups, grps, export.export.ex_ngroups * sizeof(gid_t)); export.export.ex_groups = grps; } else export.export.ex_groups = NULL; if (!error) error = nfsrv_v4rootexport(&export, cred, p); free(grps, M_TEMP); } } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) == NFSSVC_V4ROOTEXPORT) { error = copyin(uap->argp,(caddr_t)&oexp, sizeof (struct nfsex_oldargs)); if (!error) { memset(&export.export, 0, sizeof(export.export)); export.export.ex_flags = (uint64_t)oexp.export.ex_flags; export.export.ex_root = oexp.export.ex_root; export.export.ex_uid = oexp.export.ex_anon.cr_uid; export.export.ex_ngroups = oexp.export.ex_anon.cr_ngroups; export.export.ex_groups = NULL; if (export.export.ex_ngroups > XU_NGROUPS || export.export.ex_ngroups < 0) error = EINVAL; else if (export.export.ex_ngroups > 0) { export.export.ex_groups = malloc( export.export.ex_ngroups * sizeof(gid_t), M_TEMP, M_WAITOK); for (i = 0; i < export.export.ex_ngroups; i++) export.export.ex_groups[i] = oexp.export.ex_anon.cr_groups[i]; } export.export.ex_addr = oexp.export.ex_addr; export.export.ex_addrlen = oexp.export.ex_addrlen; export.export.ex_mask = oexp.export.ex_mask; export.export.ex_masklen = oexp.export.ex_masklen; export.export.ex_indexfile = oexp.export.ex_indexfile; export.export.ex_numsecflavors = oexp.export.ex_numsecflavors; if (export.export.ex_numsecflavors >= MAXSECFLAVORS || export.export.ex_numsecflavors < 0) error = EINVAL; else { for (i = 0; i < export.export.ex_numsecflavors; i++) export.export.ex_secflavors[i] = oexp.export.ex_secflavors[i]; } export.fspec = oexp.fspec; if (error == 0) error = nfsrv_v4rootexport(&export, cred, p); free(export.export.ex_groups, M_TEMP); } } else if (uap->flag & NFSSVC_NOPUBLICFH) { nfs_pubfhset = 0; error = 0; } else if (uap->flag & NFSSVC_STABLERESTART) { error = copyin(uap->argp, (caddr_t)&stablefd, sizeof (int)); if (!error) error = fp_getfvp(p, stablefd, &fp, &vp); if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) error = EBADF; if (!error && newnfs_numnfsd != 0) error = EPERM; if (!error) { nfsrv_stablefirst.nsf_fp = fp; nfsrv_setupstable(p); } } else if (uap->flag & NFSSVC_ADMINREVOKE) { error = copyin(uap->argp, (caddr_t)&adminrevoke, sizeof (struct nfsd_clid)); if (!error) error = nfsrv_adminrevoke(&adminrevoke, p); } else if (uap->flag & NFSSVC_DUMPCLIENTS) { error = copyin(uap->argp, (caddr_t)&dumplist, sizeof (struct nfsd_dumplist)); if (!error && (dumplist.ndl_size < 1 || dumplist.ndl_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) { len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO); nfsrv_dumpclients(dumpclients, dumplist.ndl_size); error = copyout(dumpclients, dumplist.ndl_list, len); free(dumpclients, M_TEMP); } } else if (uap->flag & NFSSVC_DUMPLOCKS) { error = copyin(uap->argp, (caddr_t)&dumplocklist, sizeof (struct nfsd_dumplocklist)); if (!error && (dumplocklist.ndllck_size < 1 || dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) error = nfsrv_lookupfilename(&nd, dumplocklist.ndllck_fname, p); if (!error) { len = sizeof (struct nfsd_dumplocks) * dumplocklist.ndllck_size; dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO); nfsrv_dumplocks(nd.ni_vp, dumplocks, dumplocklist.ndllck_size, p); vput(nd.ni_vp); error = copyout(dumplocks, dumplocklist.ndllck_list, len); free(dumplocks, M_TEMP); } } else if (uap->flag & NFSSVC_BACKUPSTABLE) { procp = p->td_proc; PROC_LOCK(procp); nfsd_master_pid = procp->p_pid; bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); nfsd_master_start = procp->p_stats->p_start; nfsd_master_proc = procp; PROC_UNLOCK(procp); } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd == 0) { /* Lock out all nfsd threads */ do { igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0 && suspend_nfsd == 0); suspend_nfsd = 1; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd != 0) { nfsv4_unlock(&nfsd_suspend_lock, 0); suspend_nfsd = 0; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } NFSEXITCODE(error); return (error); } /* * Check exports. * Returns 0 if ok, 1 otherwise. */ int nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) { int i; /* * Allow NFSv3 Fsinfo per RFC2623. */ if (((nd->nd_flag & ND_NFSV4) != 0 || nd->nd_procnum != NFSPROC_FSINFO) && ((NFSVNO_EXTLS(exp) && (nd->nd_flag & ND_TLS) == 0) || (NFSVNO_EXTLSCERT(exp) && (nd->nd_flag & ND_TLSCERT) == 0) || (NFSVNO_EXTLSCERTUSER(exp) && (nd->nd_flag & ND_TLSCERTUSER) == 0))) { if ((nd->nd_flag & ND_NFSV4) != 0) return (NFSERR_WRONGSEC); else if ((nd->nd_flag & ND_TLS) == 0) return (NFSERR_AUTHERR | AUTH_NEEDS_TLS); else return (NFSERR_AUTHERR | AUTH_NEEDS_TLS_MUTUAL_HOST); } /* * This seems odd, but allow the case where the security flavor * list is empty. This happens when NFSv4 is traversing non-exported * file systems. Exported file systems should always have a non-empty * security flavor list. */ if (exp->nes_numsecflavor == 0) return (0); for (i = 0; i < exp->nes_numsecflavor; i++) { /* * The tests for privacy and integrity must be first, * since ND_GSS is set for everything but AUTH_SYS. */ if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && (nd->nd_flag & ND_GSSPRIVACY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && (nd->nd_flag & ND_GSSINTEGRITY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && (nd->nd_flag & ND_GSS)) return (0); if (exp->nes_secflavors[i] == AUTH_SYS && (nd->nd_flag & ND_GSS) == 0) return (0); } if ((nd->nd_flag & ND_NFSV4) != 0) return (NFSERR_WRONGSEC); return (NFSERR_AUTHERR | AUTH_TOOWEAK); } /* * Calculate a hash value for the fid in a file handle. */ uint32_t nfsrv_hashfh(fhandle_t *fhp) { uint32_t hashval; hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); return (hashval); } /* * Calculate a hash value for the sessionid. */ uint32_t nfsrv_hashsessionid(uint8_t *sessionid) { uint32_t hashval; hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); return (hashval); } /* * Signal the userland master nfsd to backup the stable restart file. */ void nfsrv_backupstable(void) { struct proc *procp; if (nfsd_master_proc != NULL) { procp = pfind(nfsd_master_pid); /* Try to make sure it is the correct process. */ if (procp == nfsd_master_proc && procp->p_stats->p_start.tv_sec == nfsd_master_start.tv_sec && procp->p_stats->p_start.tv_usec == nfsd_master_start.tv_usec && strcmp(procp->p_comm, nfsd_master_comm) == 0) kern_psignal(procp, SIGUSR2); else nfsd_master_proc = NULL; if (procp != NULL) PROC_UNLOCK(procp); } } /* * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. * The arguments are in a structure, so that they can be passed through * taskqueue for a kernel process to execute this function. */ struct nfsrvdscreate { int done; int inprog; struct task tsk; struct ucred *tcred; struct vnode *dvp; NFSPROC_T *p; struct pnfsdsfile *pf; int err; fhandle_t fh; struct vattr va; struct vattr createva; }; int nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) { struct vnode *nvp; struct nameidata named; struct vattr va; char *bufp; u_long *hashp; struct nfsnode *np; struct nfsmount *nmp; int error; NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; named.ni_cnd.cn_thread = p; named.ni_cnd.cn_nameptr = bufp; if (fnamep != NULL) { strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); named.ni_cnd.cn_namelen = strlen(bufp); } else named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); /* Create the date file in the DS mount. */ error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); if (error == 0) { error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); vref(dvp); VOP_VPUT_PAIR(dvp, error == 0 ? &nvp : NULL, false); if (error == 0) { /* Set the ownership of the file. */ error = VOP_SETATTR(nvp, nvap, tcred); NFSD_DEBUG(4, "nfsrv_dscreate:" " setattr-uid=%d\n", error); if (error != 0) vput(nvp); } if (error != 0) printf("pNFS: pnfscreate failed=%d\n", error); } else printf("pNFS: pnfscreate vnlock=%d\n", error); if (error == 0) { np = VTONFS(nvp); nmp = VFSTONFS(nvp->v_mount); if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") != 0 || nmp->nm_nam->sa_len > sizeof( struct sockaddr_in6) || np->n_fhp->nfh_len != NFSX_MYFH) { printf("Bad DS file: fstype=%s salen=%d" " fhlen=%d\n", nvp->v_mount->mnt_vfc->vfc_name, nmp->nm_nam->sa_len, np->n_fhp->nfh_len); error = ENOENT; } /* Set extattrs for the DS on the MDS file. */ if (error == 0) { if (dsa != NULL) { error = VOP_GETATTR(nvp, &va, tcred); if (error == 0) { dsa->dsa_filerev = va.va_filerev; dsa->dsa_size = va.va_size; dsa->dsa_atime = va.va_atime; dsa->dsa_mtime = va.va_mtime; dsa->dsa_bytes = va.va_bytes; } } if (error == 0) { NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, nmp->nm_nam->sa_len); NFSBCOPY(named.ni_cnd.cn_nameptr, pf->dsf_filename, sizeof(pf->dsf_filename)); } } else printf("pNFS: pnfscreate can't get DS" " attr=%d\n", error); if (nvpp != NULL && error == 0) *nvpp = nvp; else vput(nvp); } nfsvno_relpathbuf(&named); return (error); } /* * Start up the thread that will execute nfsrv_dscreate(). */ static void start_dscreate(void *arg, int pending) { struct nfsrvdscreate *dsc; dsc = (struct nfsrvdscreate *)arg; dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); dsc->done = 1; NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); } /* * Create a pNFS data file on the Data Server(s). */ static void nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, NFSPROC_T *p) { struct nfsrvdscreate *dsc, *tdsc = NULL; struct nfsdevice *ds, *tds, *fds; struct mount *mp; struct pnfsdsfile *pf, *tpf; struct pnfsdsattr dsattr; struct vattr va; struct vnode *dvp[NFSDEV_MAXMIRRORS]; struct nfsmount *nmp; fhandle_t fh; uid_t vauid; gid_t vagid; u_short vamode; struct ucred *tcred; int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret; int failpos, timo; /* Get a DS server directory in a round-robin order. */ mirrorcnt = 1; mp = vp->v_mount; ds = fds = NULL; NFSDDSLOCK(); /* * Search for the first entry that handles this MDS fs, but use the * first entry for all MDS fs's otherwise. */ TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { if (tds->nfsdev_nmp != NULL) { if (tds->nfsdev_mdsisset == 0 && ds == NULL) ds = tds; else if (tds->nfsdev_mdsisset != 0 && fsidcmp( &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) { ds = fds = tds; break; } } } if (ds == NULL) { NFSDDSUNLOCK(); NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); return; } i = dsdir[0] = ds->nfsdev_nextdir; ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; dvp[0] = ds->nfsdev_dsdir[i]; tds = TAILQ_NEXT(ds, nfsdev_list); if (nfsrv_maxpnfsmirror > 1 && tds != NULL) { TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) { if (tds->nfsdev_nmp != NULL && ((tds->nfsdev_mdsisset == 0 && fds == NULL) || (tds->nfsdev_mdsisset != 0 && fds != NULL && fsidcmp(&mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0))) { dsdir[mirrorcnt] = i; dvp[mirrorcnt] = tds->nfsdev_dsdir[i]; mirrorcnt++; if (mirrorcnt >= nfsrv_maxpnfsmirror) break; } } } /* Put at end of list to implement round-robin usage. */ TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); NFSDDSUNLOCK(); dsc = NULL; if (mirrorcnt > 1) tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, M_WAITOK | M_ZERO); tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK | M_ZERO); error = nfsvno_getfh(vp, &fh, p); if (error == 0) error = VOP_GETATTR(vp, &va, cred); if (error == 0) { /* Set the attributes for "vp" to Setattr the DS vp. */ vauid = va.va_uid; vagid = va.va_gid; vamode = va.va_mode; VATTR_NULL(&va); va.va_uid = vauid; va.va_gid = vagid; va.va_mode = vamode; va.va_size = 0; } else printf("pNFS: pnfscreate getfh+attr=%d\n", error); NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, cred->cr_gid); /* Make data file name based on FH. */ tcred = newnfs_getcred(); /* * Create the file on each DS mirror, using kernel process(es) for the * additional mirrors. */ failpos = -1; for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { tpf->dsf_dir = dsdir[i]; tdsc->tcred = tcred; tdsc->p = p; tdsc->pf = tpf; tdsc->createva = *vap; NFSBCOPY(&fh, &tdsc->fh, sizeof(fh)); tdsc->va = va; tdsc->dvp = dvp[i]; tdsc->done = 0; tdsc->inprog = 0; tdsc->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_dscreate, tdsc); NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, NULL, tcred, p, NULL); if (ret != 0) { KASSERT(error == 0, ("nfsrv_dscreate err=%d", error)); if (failpos == -1 && nfsds_failerr(ret)) failpos = i; else error = ret; } } } if (error == 0) { tpf->dsf_dir = dsdir[mirrorcnt - 1]; error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, &dsattr, NULL, tcred, p, NULL); if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) { failpos = mirrorcnt - 1; error = 0; } } timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; /* Wait for kernel task(s) to complete. */ for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { while (tdsc->inprog != 0 && tdsc->done == 0) tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); if (tdsc->err != 0) { if (failpos == -1 && nfsds_failerr(tdsc->err)) failpos = i; else if (error == 0) error = tdsc->err; } } /* * If failpos has been set, that mirror has failed, so it needs * to be disabled. */ if (failpos >= 0) { nmp = VFSTONFS(dvp[failpos]->v_mount); NFSLOCKMNT(nmp); if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { nmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(nmp); ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, ds); if (ds != NULL) nfsrv_killrpcs(nmp); NFSLOCKMNT(nmp); nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(nmp); } NFSUNLOCKMNT(nmp); } NFSFREECRED(tcred); if (error == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n", mirrorcnt, nfsrv_maxpnfsmirror); /* * For all mirrors that couldn't be created, fill in the * *pf structure, but with an IP address == 0.0.0.0. */ tpf = pf + mirrorcnt; for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) { *tpf = *pf; tpf->dsf_sin.sin_family = AF_INET; tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in); tpf->dsf_sin.sin_addr.s_addr = 0; tpf->dsf_sin.sin_port = 0; } error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p); if (error == 0) error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); if (error != 0) printf("pNFS: pnfscreate setextattr=%d\n", error); } else printf("pNFS: pnfscreate=%d\n", error); free(pf, M_TEMP); free(dsc, M_TEMP); } /* * Get the information needed to remove the pNFS Data Server file from the * Metadata file. Upon success, ddvp is set non-NULL to the locked * DS directory vnode. The caller must unlock *ddvp when done with it. */ static void nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, int *mirrorcntp, char *fname, fhandle_t *fhp) { struct vattr va; struct ucred *tcred; char *buf; int buflen, error; dvpp[0] = NULL; /* If not an exported regular file or not a pNFS server, just return. */ if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || nfsrv_devidcnt == 0) return; /* Check to see if this is the last hard link. */ tcred = newnfs_getcred(); error = VOP_GETATTR(vp, &va, tcred); NFSFREECRED(tcred); if (error != 0) { printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); return; } if (va.va_nlink > 1) return; error = nfsvno_getfh(vp, fhp, p); if (error != 0) { printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); return; } buflen = 1024; buf = malloc(buflen, M_TEMP, M_WAITOK); /* Get the directory vnode for the DS mount and the file handle. */ error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp, NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); free(buf, M_TEMP); if (error != 0) printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); } /* * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. * The arguments are in a structure, so that they can be passed through * taskqueue for a kernel process to execute this function. */ struct nfsrvdsremove { int done; int inprog; struct task tsk; struct ucred *tcred; struct vnode *dvp; NFSPROC_T *p; int err; char fname[PNFS_FILENAME_LEN + 1]; }; static int nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, NFSPROC_T *p) { struct nameidata named; struct vnode *nvp; char *bufp; u_long *hashp; int error; error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); if (error != 0) return (error); named.ni_cnd.cn_nameiop = DELETE; named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; named.ni_cnd.cn_cred = tcred; named.ni_cnd.cn_thread = p; named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; nfsvno_setpathbuf(&named, &bufp, &hashp); named.ni_cnd.cn_nameptr = bufp; named.ni_cnd.cn_namelen = strlen(fname); strlcpy(bufp, fname, NAME_MAX); NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); if (error == 0) { error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); vput(nvp); } NFSVOPUNLOCK(dvp); nfsvno_relpathbuf(&named); if (error != 0) printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); return (error); } /* * Start up the thread that will execute nfsrv_dsremove(). */ static void start_dsremove(void *arg, int pending) { struct nfsrvdsremove *dsrm; dsrm = (struct nfsrvdsremove *)arg; dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, dsrm->p); dsrm->done = 1; NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); } /* * Remove a pNFS data file from a Data Server. * nfsrv_pnfsremovesetup() must have been called before the MDS file was * removed to set up the dvp and fill in the FH. */ static void nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp, NFSPROC_T *p) { struct ucred *tcred; struct nfsrvdsremove *dsrm, *tdsrm; struct nfsdevice *ds; struct nfsmount *nmp; int failpos, i, ret, timo; tcred = newnfs_getcred(); dsrm = NULL; if (mirrorcnt > 1) dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); /* * Remove the file on each DS mirror, using kernel process(es) for the * additional mirrors. */ failpos = -1; for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { tdsrm->tcred = tcred; tdsrm->p = p; tdsrm->dvp = dvp[i]; strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); tdsrm->inprog = 0; tdsrm->done = 0; tdsrm->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_dsremove, tdsrm); NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_dsremove(dvp[i], fname, tcred, p); if (failpos == -1 && nfsds_failerr(ret)) failpos = i; } } ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret)) failpos = mirrorcnt - 1; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; /* Wait for kernel task(s) to complete. */ for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { while (tdsrm->inprog != 0 && tdsrm->done == 0) tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); if (failpos == -1 && nfsds_failerr(tdsrm->err)) failpos = i; } /* * If failpos has been set, that mirror has failed, so it needs * to be disabled. */ if (failpos >= 0) { nmp = VFSTONFS(dvp[failpos]->v_mount); NFSLOCKMNT(nmp); if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { nmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(nmp); ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p); NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, ds); if (ds != NULL) nfsrv_killrpcs(nmp); NFSLOCKMNT(nmp); nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(nmp); } NFSUNLOCKMNT(nmp); } /* Get rid all layouts for the file. */ nfsrv_freefilelayouts(fhp); NFSFREECRED(tcred); free(dsrm, M_TEMP); } /* * Generate a file name based on the file handle and put it in *bufp. * Return the number of bytes generated. */ static int nfsrv_putfhname(fhandle_t *fhp, char *bufp) { int i; uint8_t *cp; const uint8_t *hexdigits = "0123456789abcdef"; cp = (uint8_t *)fhp; for (i = 0; i < sizeof(*fhp); i++) { bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; } bufp[2 * i] = '\0'; return (2 * i); } /* * Update the Metadata file's attributes from the DS file when a Read/Write * layout is returned. * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. */ int nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) { struct ucred *tcred; int error; /* Do this as root so that it won't fail with EACCES. */ tcred = newnfs_getcred(); error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, NULL, NULL, NULL, nap, NULL, NULL, 0, NULL); NFSFREECRED(tcred); return (error); } /* * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. */ static int nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, NFSPROC_T *p) { int error; error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL, NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL); return (error); } static int nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred, struct thread *p, int ioproc, struct mbuf **mpp, char *cp, struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp, off_t *offp, int content, bool *eofp) { struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; fhandle_t fh[NFSDEV_MAXMIRRORS]; struct vnode *dvp[NFSDEV_MAXMIRRORS]; struct nfsdevice *ds; struct pnfsdsattr dsattr; struct opnfsdsattr odsattr; char *buf; int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; NFSD_DEBUG(4, "in nfsrv_proxyds\n"); /* * If not a regular file, not exported or not a pNFS server, * just return ENOENT. */ if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || nfsrv_devidcnt == 0) return (ENOENT); buflen = 1024; buf = malloc(buflen, M_TEMP, M_WAITOK); error = 0; /* * For Getattr, get the Change attribute (va_filerev) and size (va_size) * from the MetaData file's extended attribute. */ if (ioproc == NFSPROC_GETATTR) { error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, p); if (error == 0) { if (buflen == sizeof(odsattr)) { NFSBCOPY(buf, &odsattr, buflen); nap->na_filerev = odsattr.dsa_filerev; nap->na_size = odsattr.dsa_size; nap->na_atime = odsattr.dsa_atime; nap->na_mtime = odsattr.dsa_mtime; /* * Fake na_bytes by rounding up na_size. * Since we don't know the block size, just * use BLKDEV_IOSIZE. */ nap->na_bytes = (odsattr.dsa_size + BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1); } else if (buflen == sizeof(dsattr)) { NFSBCOPY(buf, &dsattr, buflen); nap->na_filerev = dsattr.dsa_filerev; nap->na_size = dsattr.dsa_size; nap->na_atime = dsattr.dsa_atime; nap->na_mtime = dsattr.dsa_mtime; nap->na_bytes = dsattr.dsa_bytes; } else error = ENXIO; } if (error == 0) { /* * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() * returns 0, just return now. nfsrv_checkdsattr() * returns 0 if there is no Read/Write layout * plus either an Open/Write_access or Write * delegation issued to a client for the file. */ if (nfsrv_pnfsgetdsattr == 0 || nfsrv_checkdsattr(vp, p) == 0) { free(buf, M_TEMP); return (error); } } /* * Clear ENOATTR so the code below will attempt to do a * nfsrv_getattrdsrpc() to get the attributes and (re)create * the extended attribute. */ if (error == ENOATTR) error = 0; } origmircnt = -1; trycnt = 0; tryagain: if (error == 0) { buflen = 1024; if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) printf("nfsrv_proxyds: Readds vp exclusively locked\n"); error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL, NULL, NULL); if (error == 0) { for (i = 0; i < mirrorcnt; i++) nmp[i] = VFSTONFS(dvp[i]->v_mount); } else printf("pNFS: proxy getextattr sockaddr=%d\n", error); } else printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); if (error == 0) { failpos = -1; if (origmircnt == -1) origmircnt = mirrorcnt; /* * If failpos is set to a mirror#, then that mirror has * failed and will be disabled. For Read, Getattr and Seek, the * function only tries one mirror, so if that mirror has * failed, it will need to be retried. As such, increment * tryitagain for these cases. * For Write, Setattr and Setacl, the function tries all * mirrors and will not return an error for the case where * one mirror has failed. For these cases, the functioning * mirror(s) will have been modified, so a retry isn't * necessary. These functions will set failpos for the * failed mirror#. */ if (ioproc == NFSPROC_READDS) { error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], mpp, mpp2); if (nfsds_failerr(error) && mirrorcnt > 1) { /* * Setting failpos will cause the mirror * to be disabled and then a retry of this * read is required. */ failpos = 0; error = 0; trycnt++; } } else if (ioproc == NFSPROC_WRITEDS) error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, &nmp[0], mirrorcnt, mpp, cp, &failpos); else if (ioproc == NFSPROC_SETATTR) error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], mirrorcnt, nap, &failpos); else if (ioproc == NFSPROC_SETACL) error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], mirrorcnt, aclp, &failpos); else if (ioproc == NFSPROC_SEEKDS) { error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred, p, nmp[0]); if (nfsds_failerr(error) && mirrorcnt > 1) { /* * Setting failpos will cause the mirror * to be disabled and then a retry of this * read is required. */ failpos = 0; error = 0; trycnt++; } } else if (ioproc == NFSPROC_ALLOCATE) error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp, &nmp[0], mirrorcnt, &failpos); else { error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, vp, nmp[mirrorcnt - 1], nap); if (nfsds_failerr(error) && mirrorcnt > 1) { /* * Setting failpos will cause the mirror * to be disabled and then a retry of this * getattr is required. */ failpos = mirrorcnt - 1; error = 0; trycnt++; } } ds = NULL; if (failpos >= 0) { failnmp = nmp[failpos]; NFSLOCKMNT(failnmp); if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(failnmp); ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, failnmp, p); NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", failpos, ds); if (ds != NULL) nfsrv_killrpcs(failnmp); NFSLOCKMNT(failnmp); failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(failnmp); } NFSUNLOCKMNT(failnmp); } for (i = 0; i < mirrorcnt; i++) NFSVOPUNLOCK(dvp[i]); NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, trycnt); /* Try the Read/Getattr again if a mirror was deleted. */ if (ds != NULL && trycnt > 0 && trycnt < origmircnt) goto tryagain; } else { /* Return ENOENT for any Extended Attribute error. */ error = ENOENT; } free(buf, M_TEMP); NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); return (error); } /* * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended * attribute. * newnmpp - If it points to a non-NULL nmp, that is the destination and needs * to be checked. If it points to a NULL nmp, then it returns * a suitable destination. * curnmp - If non-NULL, it is the source mount for the copy. */ int nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp, char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, struct nfsmount *curnmp, int *ippos, int *dsdirp) { struct vnode *dvp, *nvp = NULL, **tdvpp; struct mount *mp; struct nfsmount *nmp, *newnmp; struct sockaddr *sad; struct sockaddr_in *sin; struct nfsdevice *ds, *tds, *fndds; struct pnfsdsfile *pf; uint32_t dsdir; int error, fhiszero, fnd, gotone, i, mirrorcnt; ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); *mirrorcntp = 1; tdvpp = dvpp; if (nvpp != NULL) *nvpp = NULL; if (dvpp != NULL) *dvpp = NULL; if (ippos != NULL) *ippos = -1; if (newnmpp != NULL) newnmp = *newnmpp; else newnmp = NULL; mp = vp->v_mount; error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", buflenp, buf, p); mirrorcnt = *buflenp / sizeof(*pf); if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || *buflenp != sizeof(*pf) * mirrorcnt)) error = ENOATTR; pf = (struct pnfsdsfile *)buf; /* If curnmp != NULL, check for a match in the mirror list. */ if (curnmp != NULL && error == 0) { fnd = 0; for (i = 0; i < mirrorcnt; i++, pf++) { sad = (struct sockaddr *)&pf->dsf_sin; if (nfsaddr2_match(sad, curnmp->nm_nam)) { if (ippos != NULL) *ippos = i; fnd = 1; break; } } if (fnd == 0) error = ENXIO; } gotone = 0; pf = (struct pnfsdsfile *)buf; NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt, error); for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { fhiszero = 0; sad = (struct sockaddr *)&pf->dsf_sin; sin = &pf->dsf_sin; dsdir = pf->dsf_dir; if (dsdir >= nfsrv_dsdirsize) { printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); error = ENOATTR; } else if (nvpp != NULL && newnmp != NULL && nfsaddr2_match(sad, newnmp->nm_nam)) error = EEXIST; if (error == 0) { if (ippos != NULL && curnmp == NULL && sad->sa_family == AF_INET && sin->sin_addr.s_addr == 0) *ippos = i; if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) fhiszero = 1; /* Use the socket address to find the mount point. */ fndds = NULL; NFSDDSLOCK(); /* Find a match for the IP address. */ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp != NULL) { dvp = ds->nfsdev_dvp; nmp = VFSTONFS(dvp->v_mount); if (nmp != ds->nfsdev_nmp) printf("different2 nmp %p %p\n", nmp, ds->nfsdev_nmp); if (nfsaddr2_match(sad, nmp->nm_nam)) { fndds = ds; break; } } } if (fndds != NULL && newnmpp != NULL && newnmp == NULL) { /* Search for a place to make a mirror copy. */ TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) { if (tds->nfsdev_nmp != NULL && fndds != tds && ((tds->nfsdev_mdsisset == 0 && fndds->nfsdev_mdsisset == 0) || (tds->nfsdev_mdsisset != 0 && fndds->nfsdev_mdsisset != 0 && fsidcmp(&tds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0))) { *newnmpp = tds->nfsdev_nmp; break; } } if (tds != NULL) { /* * Move this entry to the end of the * list, so it won't be selected as * easily the next time. */ TAILQ_REMOVE(&nfsrv_devidhead, tds, nfsdev_list); TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds, nfsdev_list); } } NFSDDSUNLOCK(); if (fndds != NULL) { dvp = fndds->nfsdev_dsdir[dsdir]; if (lktype != 0 || fhiszero != 0 || (nvpp != NULL && *nvpp == NULL)) { if (fhiszero != 0) error = vn_lock(dvp, LK_EXCLUSIVE); else if (lktype != 0) error = vn_lock(dvp, lktype); else error = vn_lock(dvp, LK_SHARED); /* * If the file handle is all 0's, try to * do a Lookup against the DS to acquire * it. * If dvpp == NULL or the Lookup fails, * unlock dvp after the call. */ if (error == 0 && (fhiszero != 0 || (nvpp != NULL && *nvpp == NULL))) { error = nfsrv_pnfslookupds(vp, dvp, pf, &nvp, p); if (error == 0) { if (fhiszero != 0) nfsrv_pnfssetfh( vp, pf, devid, fnamep, nvp, p); if (nvpp != NULL && *nvpp == NULL) { *nvpp = nvp; *dsdirp = dsdir; } else vput(nvp); } if (error != 0 || lktype == 0) NFSVOPUNLOCK(dvp); } } if (error == 0) { gotone++; NFSD_DEBUG(4, "gotone=%d\n", gotone); if (devid != NULL) { NFSBCOPY(fndds->nfsdev_deviceid, devid, NFSX_V4DEVICEID); devid += NFSX_V4DEVICEID; } if (dvpp != NULL) *tdvpp++ = dvp; if (fhp != NULL) NFSBCOPY(&pf->dsf_fh, fhp++, NFSX_MYFH); if (fnamep != NULL && gotone == 1) strlcpy(fnamep, pf->dsf_filename, sizeof(pf->dsf_filename)); } else NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " "err=%d\n", error); } } } if (error == 0 && gotone == 0) error = ENOENT; NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, error); if (error == 0) *mirrorcntp = gotone; else { if (gotone > 0 && dvpp != NULL) { /* * If the error didn't occur on the first one and * dvpp != NULL, the one(s) prior to the failure will * have locked dvp's that need to be unlocked. */ for (i = 0; i < gotone; i++) { NFSVOPUNLOCK(*dvpp); *dvpp++ = NULL; } } /* * If it found the vnode to be copied from before a failure, * it needs to be vput()'d. */ if (nvpp != NULL && *nvpp != NULL) { vput(*nvpp); *nvpp = NULL; } } return (error); } /* * Set the extended attribute for the Change attribute. */ static int nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) { struct pnfsdsattr dsattr; int error; ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); dsattr.dsa_filerev = nap->na_filerev; dsattr.dsa_size = nap->na_size; dsattr.dsa_atime = nap->na_atime; dsattr.dsa_mtime = nap->na_mtime; dsattr.dsa_bytes = nap->na_bytes; error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p); if (error != 0) printf("pNFS: setextattr=%d\n", error); return (error); } static int nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) { uint32_t *tl; struct nfsrv_descript *nd; nfsv4stateid_t st; struct mbuf *m, *m2; int error = 0, retlen, tlen, trimlen; NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); *mpp = NULL; /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), NULL, NULL, 0, 0); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); txdr_hyper(off, tl); *(tl + 2) = txdr_unsigned(len); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); NFSM_STRSIZ(retlen, len); if (retlen > 0) { /* Trim off the pre-data XDR from the mbuf chain. */ m = nd->nd_mrep; while (m != NULL && m != nd->nd_md) { if (m->m_next == nd->nd_md) { m->m_next = NULL; m_freem(nd->nd_mrep); nd->nd_mrep = m = nd->nd_md; } else m = m->m_next; } if (m == NULL) { printf("nfsrv_readdsrpc: busted mbuf list\n"); error = ENOENT; goto nfsmout; } /* * Now, adjust first mbuf so that any XDR before the * read data is skipped over. */ trimlen = nd->nd_dpos - mtod(m, char *); if (trimlen > 0) { m->m_len -= trimlen; NFSM_DATAP(m, trimlen); } /* * Truncate the mbuf chain at retlen bytes of data, * plus XDR padding that brings the length up to a * multiple of 4. */ tlen = NFSM_RNDUP(retlen); do { if (m->m_len >= tlen) { m->m_len = tlen; tlen = 0; m2 = m->m_next; m->m_next = NULL; m_freem(m2); break; } tlen -= m->m_len; m = m->m_next; } while (m != NULL); if (tlen > 0) { printf("nfsrv_readdsrpc: busted mbuf list\n"); error = ENOENT; goto nfsmout; } *mpp = nd->nd_mrep; *mpendp = m; nd->nd_mrep = NULL; } } else error = nd->nd_repstat; nfsmout: /* If nd->nd_mrep is already NULL, this is a no-op. */ m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); return (error); } /* * Do a write RPC on a DS data file, using this structure for the arguments, * so that this function can be executed by a separate kernel process. */ struct nfsrvwritedsdorpc { int done; int inprog; struct task tsk; fhandle_t fh; off_t off; int len; struct nfsmount *nmp; struct ucred *cred; NFSPROC_T *p; struct mbuf *m; int err; }; static int nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript *nd; nfsattrbit_t attrbits; nfsv4stateid_t st; int commit, error, retlen; nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); txdr_hyper(off, tl); tl += 2; /* * Do all writes FileSync, since the server doesn't hold onto dirty * buffers. Since clients should be accessing the DS servers directly * using the pNFS layouts, this just needs to work correctly as a * fallback. */ *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); *tl = txdr_unsigned(len); NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); /* Put data in mbuf chain. */ nd->nd_mb->m_next = m; /* Set nd_mb and nd_bpos to end of data. */ while (m->m_next != NULL) m = m->m_next; nd->nd_mb = m; nfsm_set(nd, m->m_len); NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); /* Do a Getattr for the attributes that change upon writing. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); /* Get rid of weak cache consistency data for now. */ if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); if (error != 0) goto nfsmout; /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); retlen = fxdr_unsigned(int, *tl++); commit = fxdr_unsigned(int, *tl); if (commit != NFSWRITE_FILESYNC) error = NFSERR_IO; NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", retlen, commit, error); } else error = nd->nd_repstat; /* We have no use for the Write Verifier since we use FileSync. */ /* * Get the Change, Size, Access Time and Modify Time attributes and set * on the Metadata file, so its attributes will be what the file's * would be if it had been written. */ if (error == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); return (error); } /* * Start up the thread that will execute nfsrv_writedsdorpc(). */ static void start_writedsdorpc(void *arg, int pending) { struct nfsrvwritedsdorpc *drpc; drpc = (struct nfsrvwritedsdorpc *)arg; drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, drpc->len, NULL, drpc->m, drpc->cred, drpc->p); drpc->done = 1; NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); } static int nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct mbuf **mpp, char *cp, int *failposp) { struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL; struct nfsvattr na; struct mbuf *m; int error, i, offs, ret, timo; NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* Calculate offset in mbuf chain that data starts. */ offs = cp - mtod(*mpp, char *); NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len); /* * Do the write RPC for every DS, using a separate kernel process * for every DS except the last one. */ error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { tdrpc->done = 0; NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); tdrpc->off = off; tdrpc->len = len; tdrpc->nmp = *nmpp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->inprog = 0; tdrpc->err = 0; tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_writedsdorpc, tdrpc); NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, tdrpc->m, cred, p); if (nfsds_failerr(ret) && *failposp == -1) *failposp = i; else if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) *failposp = mirrorcnt - 1; else if (error == 0 && ret != 0) error = ret; if (error == 0) error = nfsrv_setextattr(vp, &na, p); NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { /* Wait for RPCs on separate threads to complete. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); if (nfsds_failerr(tdrpc->err) && *failposp == -1) *failposp = i; else if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); return (error); } /* * Do a allocate RPC on a DS data file, using this structure for the arguments, * so that this function can be executed by a separate kernel process. */ struct nfsrvallocatedsdorpc { int done; int inprog; struct task tsk; fhandle_t fh; off_t off; off_t len; struct nfsmount *nmp; struct ucred *cred; NFSPROC_T *p; int err; }; static int nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript *nd; nfsattrbit_t attrbits; nfsv4stateid_t st; int error; nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n", nd->nd_repstat); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } else error = nd->nd_repstat; NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error); nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error); return (error); } /* * Start up the thread that will execute nfsrv_allocatedsdorpc(). */ static void start_allocatedsdorpc(void *arg, int pending) { struct nfsrvallocatedsdorpc *drpc; drpc = (struct nfsrvallocatedsdorpc *)arg; drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, drpc->len, NULL, drpc->cred, drpc->p); drpc->done = 1; NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err); } static int nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, int *failposp) { struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL; struct nfsvattr na; int error, i, ret, timo; NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n"); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* * Do the allocate RPC for every DS, using a separate kernel process * for every DS except the last one. */ error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { tdrpc->done = 0; NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); tdrpc->off = off; tdrpc->len = len; tdrpc->nmp = *nmpp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->inprog = 0; tdrpc->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc); NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL, cred, p); if (nfsds_failerr(ret) && *failposp == -1) *failposp = i; else if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) *failposp = mirrorcnt - 1; else if (error == 0 && ret != 0) error = ret; if (error == 0) error = nfsrv_setextattr(vp, &na, p); NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { /* Wait for RPCs on separate threads to complete. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); if (nfsds_failerr(tdrpc->err) && *failposp == -1) *failposp = i; else if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); return (error); } static int nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, struct nfsvattr *dsnap) { uint32_t *tl; struct nfsrv_descript *nd; nfsv4stateid_t st; nfsattrbit_t attrbits; int error; NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), NULL, NULL, 0, 0); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); /* Do a Getattr for the attributes that change due to writing. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", nd->nd_repstat); /* Get rid of weak cache consistency data for now. */ if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); if (error != 0) goto nfsmout; /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; } error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error != 0) goto nfsmout; if (nd->nd_repstat != 0) error = nd->nd_repstat; /* * Get the Change, Size, Access Time and Modify Time attributes and set * on the Metadata file, so its attributes will be what the file's * would be if it had been written. */ if (error == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); return (error); } struct nfsrvsetattrdsdorpc { int done; int inprog; struct task tsk; fhandle_t fh; struct nfsmount *nmp; struct vnode *vp; struct ucred *cred; NFSPROC_T *p; struct nfsvattr na; struct nfsvattr dsna; int err; }; /* * Start up the thread that will execute nfsrv_setattrdsdorpc(). */ static void start_setattrdsdorpc(void *arg, int pending) { struct nfsrvsetattrdsdorpc *drpc; drpc = (struct nfsrvsetattrdsdorpc *)arg; drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); drpc->done = 1; } static int nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct nfsvattr *nap, int *failposp) { struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL; struct nfsvattr na; int error, i, ret, timo; NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* * Do the setattr RPC for every DS, using a separate kernel process * for every DS except the last one. */ error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { tdrpc->done = 0; tdrpc->inprog = 0; NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); tdrpc->nmp = *nmpp; tdrpc->vp = vp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->na = *nap; tdrpc->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); if (nfsds_failerr(ret) && *failposp == -1) *failposp = i; else if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) *failposp = mirrorcnt - 1; else if (error == 0 && ret != 0) error = ret; if (error == 0) error = nfsrv_setextattr(vp, &na, p); NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { /* Wait for RPCs on separate threads to complete. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); if (nfsds_failerr(tdrpc->err) && *failposp == -1) *failposp = i; else if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); return (error); } /* * Do a Setattr of an NFSv4 ACL on the DS file. */ static int nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) { struct nfsrv_descript *nd; nfsv4stateid_t st; nfsattrbit_t attrbits; int error; NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), NULL, NULL, 0, 0); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); /* * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), * so passing in the metadata "vp" will be ok, since it is of * the same type (VREG). */ nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, 0, NULL); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", nd->nd_repstat); error = nd->nd_repstat; m_freem(nd->nd_mrep); free(nd, M_TEMP); return (error); } struct nfsrvsetacldsdorpc { int done; int inprog; struct task tsk; fhandle_t fh; struct nfsmount *nmp; struct vnode *vp; struct ucred *cred; NFSPROC_T *p; struct acl *aclp; int err; }; /* * Start up the thread that will execute nfsrv_setacldsdorpc(). */ static void start_setacldsdorpc(void *arg, int pending) { struct nfsrvsetacldsdorpc *drpc; drpc = (struct nfsrvsetacldsdorpc *)arg; drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, drpc->vp, drpc->nmp, drpc->aclp); drpc->done = 1; } static int nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, int *failposp) { struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL; int error, i, ret, timo; NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); drpc = NULL; if (mirrorcnt > 1) tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK); /* * Do the setattr RPC for every DS, using a separate kernel process * for every DS except the last one. */ error = 0; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { tdrpc->done = 0; tdrpc->inprog = 0; NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); tdrpc->nmp = *nmpp; tdrpc->vp = vp; tdrpc->cred = cred; tdrpc->p = p; tdrpc->aclp = aclp; tdrpc->err = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", ret); } if (ret != 0) { ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); if (nfsds_failerr(ret) && *failposp == -1) *failposp = i; else if (error == 0 && ret != 0) error = ret; } nmpp++; fhp++; } ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) *failposp = mirrorcnt - 1; else if (error == 0 && ret != 0) error = ret; NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { /* Wait for RPCs on separate threads to complete. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); if (nfsds_failerr(tdrpc->err) && *failposp == -1) *failposp = i; else if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); return (error); } /* * Getattr call to the DS for the attributes that change due to writing. */ static int nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) { struct nfsrv_descript *nd; int error; nfsattrbit_t attrbits; NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED); (void) nfsrv_putattrbit(nd, &attrbits); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", nd->nd_repstat); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); /* * We can only save the updated values in the extended * attribute if the vp is exclusively locked. * This should happen when any of the following operations * occur on the vnode: * Close, Delegreturn, LayoutCommit, LayoutReturn * As such, the updated extended attribute should get saved * before nfsrv_checkdsattr() returns 0 and allows the cached * attributes to be returned without calling this function. */ if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { error = nfsrv_setextattr(vp, nap, p); NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", error); } } else error = nd->nd_repstat; m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); return (error); } /* * Seek call to a DS. */ static int nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp) { uint32_t *tl; struct nfsrv_descript *nd; nfsv4stateid_t st; int error; NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n"); /* * Use a stateid where other is an alternating 01010 pattern and * seqid is 0xffffffff. This value is not defined as special by * the RFC and is used by the FreeBSD NFS server to indicate an * MDS->DS proxy operation. */ st.other[0] = 0x55555555; st.other[1] = 0x55555555; st.other[2] = 0x55555555; st.seqid = 0xffffffff; nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp, sizeof(fhandle_t), NULL, NULL, 0, 0); nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); txdr_hyper(*offp, tl); tl += 2; *tl = txdr_unsigned(content); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) { free(nd, M_TEMP); return (error); } NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); if (*tl++ == newnfs_true) *eofp = true; else *eofp = false; *offp = fxdr_hyper(tl); } else error = nd->nd_repstat; nfsmout: m_freem(nd->nd_mrep); free(nd, M_TEMP); NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error); return (error); } /* * Get the device id and file handle for a DS file. */ int nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, fhandle_t *fhp, char *devid) { int buflen, error; char *buf; buflen = 1024; buf = malloc(buflen, M_TEMP, M_WAITOK); error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL, fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL); free(buf, M_TEMP); return (error); } /* * Do a Lookup against the DS for the filename. */ static int nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, struct vnode **nvpp, NFSPROC_T *p) { struct nameidata named; struct ucred *tcred; char *bufp; u_long *hashp; struct vnode *nvp; int error; tcred = newnfs_getcred(); named.ni_cnd.cn_nameiop = LOOKUP; named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; named.ni_cnd.cn_cred = tcred; named.ni_cnd.cn_thread = p; named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; nfsvno_setpathbuf(&named, &bufp, &hashp); named.ni_cnd.cn_nameptr = bufp; named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); strlcpy(bufp, pf->dsf_filename, NAME_MAX); NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); NFSFREECRED(tcred); nfsvno_relpathbuf(&named); if (error == 0) *nvpp = nvp; NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); return (error); } /* * Set the file handle to the correct one. */ static void nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid, char *fnamep, struct vnode *nvp, NFSPROC_T *p) { struct nfsnode *np; int ret = 0; np = VTONFS(nvp); NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); /* * We can only do a vn_set_extattr() if the vnode is exclusively * locked and vn_start_write() has been done. If devid != NULL or * fnamep != NULL or the vnode is shared locked, vn_start_write() * may not have been done. * If not done now, it will be done on a future call. */ if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) ret = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf), (char *)pf, p); NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); } /* * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point * when the DS has failed. */ void nfsrv_killrpcs(struct nfsmount *nmp) { /* * Call newnfs_nmcancelreqs() to cause * any RPCs in progress on the mount point to * fail. * This will cause any process waiting for an * RPC to complete while holding a vnode lock * on the mounted-on vnode (such as "df" or * a non-forced "umount") to fail. * This will unlock the mounted-on vnode so * a forced dismount can succeed. * The NFSMNTP_CANCELRPCS flag should be set when this function is * called. */ newnfs_nmcancelreqs(nmp); } /* * Sum up the statfs info for each of the DSs, so that the client will * receive the total for all DSs. */ static int nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp) { struct statfs *tsf; struct nfsdevice *ds; struct vnode **dvpp, **tdvpp, *dvp; uint64_t tot; int cnt, error = 0, i; if (nfsrv_devidcnt <= 0) return (ENXIO); dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); /* Get an array of the dvps for the DSs. */ tdvpp = dvpp; i = 0; NFSDDSLOCK(); /* First, search for matches for same file system. */ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) { if (++i > nfsrv_devidcnt) break; *tdvpp++ = ds->nfsdev_dvp; } } /* * If no matches for same file system, total all servers not assigned * to a file system. */ if (i == 0) { TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset == 0) { if (++i > nfsrv_devidcnt) break; *tdvpp++ = ds->nfsdev_dvp; } } } NFSDDSUNLOCK(); cnt = i; /* Do a VFS_STATFS() for each of the DSs and sum them up. */ tdvpp = dvpp; for (i = 0; i < cnt && error == 0; i++) { dvp = *tdvpp++; error = VFS_STATFS(dvp->v_mount, tsf); if (error == 0) { if (sf->f_bsize == 0) { if (tsf->f_bsize > 0) sf->f_bsize = tsf->f_bsize; else sf->f_bsize = 8192; } if (tsf->f_blocks > 0) { if (sf->f_bsize != tsf->f_bsize) { tot = tsf->f_blocks * tsf->f_bsize; sf->f_blocks += (tot / sf->f_bsize); } else sf->f_blocks += tsf->f_blocks; } if (tsf->f_bfree > 0) { if (sf->f_bsize != tsf->f_bsize) { tot = tsf->f_bfree * tsf->f_bsize; sf->f_bfree += (tot / sf->f_bsize); } else sf->f_bfree += tsf->f_bfree; } if (tsf->f_bavail > 0) { if (sf->f_bsize != tsf->f_bsize) { tot = tsf->f_bavail * tsf->f_bsize; sf->f_bavail += (tot / sf->f_bsize); } else sf->f_bavail += tsf->f_bavail; } } } free(tsf, M_TEMP); free(dvpp, M_TEMP); return (error); } /* * Set an NFSv4 acl. */ int nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) { int error; if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { error = NFSERR_ATTRNOTSUPP; goto out; } /* * With NFSv4 ACLs, chmod(2) may need to add additional entries. * Make sure it has enough room for that - splitting every entry * into two and appending "canonical six" entries at the end. * Cribbed out of kern/vfs_acl.c - Rick M. */ if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { error = NFSERR_ATTRNOTSUPP; goto out; } error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); if (error == 0) { error = nfsrv_dssetacl(vp, aclp, cred, p); if (error == ENOENT) error = 0; } out: NFSEXITCODE(error); return (error); } /* * Seek vnode op call (actually it is a VOP_IOCTL()). * This function is called with the vnode locked, but unlocks and vrele()s * the vp before returning. */ int nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd, off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p) { struct nfsvattr at; int error, ret; ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp"); /* * Attempt to seek on a DS file. A return of ENOENT implies * there is no DS file to seek on. */ error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL, NULL, NULL, NULL, NULL, offp, content, eofp); if (error != ENOENT) { vput(vp); return (error); } /* * Do the VOP_IOCTL() call. For the case where *offp == file_size, * VOP_IOCTL() will return ENXIO. However, the correct reply for * NFSv4.2 is *eofp == true and error == 0 for this case. */ NFSVOPUNLOCK(vp); error = VOP_IOCTL(vp, cmd, offp, 0, cred, p); *eofp = false; if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) { /* Handle the cases where we might be at EOF. */ ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); if (ret == 0 && *offp == at.na_size) { *eofp = true; error = 0; } if (ret != 0 && error == 0) error = ret; } vrele(vp); NFSEXITCODE(error); return (error); } /* * Allocate vnode op call. */ int nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, NFSPROC_T *p) { int error, trycnt; ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp"); /* * Attempt to allocate on a DS file. A return of ENOENT implies * there is no DS file to allocate on. */ error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL, NULL, NULL, NULL, NULL, &len, 0, NULL); if (error != ENOENT) return (error); error = 0; /* * Do the actual VOP_ALLOCATE(), looping a reasonable number of * times to achieve completion. */ trycnt = 0; while (error == 0 && len > 0 && trycnt++ < 20) error = VOP_ALLOCATE(vp, &off, &len); if (error == 0 && len > 0) error = NFSERR_IO; NFSEXITCODE(error); return (error); } /* * Get Extended Atribute vnode op into an mbuf list. */ int nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp, struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) { struct iovec *iv; struct uio io, *uiop = &io; struct mbuf *m, *m2; int alen, error, len, tlen; size_t siz; /* First, find out the size of the extended attribute. */ error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, &siz, cred, p); if (error != 0) return (NFSERR_NOXATTR); if (siz > maxresp - NFS_MAXXDR) return (NFSERR_XATTR2BIG); len = siz; tlen = NFSM_RNDUP(len); if (tlen > 0) { /* * If cnt > MCLBYTES and the reply will not be saved, use * ext_pgs mbufs for TLS. * For NFSv4.0, we do not know for sure if the reply will * be saved, so do not use ext_pgs mbufs for NFSv4.0. * Always use ext_pgs mbufs if ND_EXTPG is set. */ if ((flag & ND_EXTPG) != 0 || (tlen > MCLBYTES && (flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS && (flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)) uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen, maxextsiz, &m, &m2, &iv); else uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2, &iv); uiop->uio_iov = iv; } else { uiop->uio_iovcnt = 0; uiop->uio_iov = iv = NULL; m = m2 = NULL; } uiop->uio_offset = 0; uiop->uio_resid = tlen; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = p; #ifdef MAC error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER, name); if (error != 0) goto out; #endif if (tlen > 0) error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, NULL, cred, p); if (error != 0) goto out; if (uiop->uio_resid > 0) { alen = tlen; len = tlen - uiop->uio_resid; tlen = NFSM_RNDUP(len); if (alen != tlen) printf("nfsvno_getxattr: weird size read\n"); if (tlen == 0) { m_freem(m); m = m2 = NULL; } else if (alen != tlen || tlen != len) m2 = nfsrv_adj(m, alen - tlen, tlen - len); } *lenp = len; *mpp = m; *mpendp = m2; out: if (error != 0) { if (m != NULL) m_freem(m); *lenp = 0; } free(iv, M_TEMP); NFSEXITCODE(error); return (error); } /* * Set Extended attribute vnode op from an mbuf list. */ int nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m, char *cp, struct ucred *cred, struct thread *p) { struct iovec *iv; struct uio uio, *uiop = &uio; int cnt, error; error = 0; #ifdef MAC error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER, name); #endif if (error != 0) goto out; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = p; uiop->uio_offset = 0; uiop->uio_resid = len; if (len > 0) { error = nfsrv_createiovecw(len, m, cp, &iv, &cnt); uiop->uio_iov = iv; uiop->uio_iovcnt = cnt; } else { uiop->uio_iov = iv = NULL; uiop->uio_iovcnt = 0; } if (error == 0) { error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop, cred, p); free(iv, M_TEMP); } out: NFSEXITCODE(error); return (error); } /* * Remove Extended attribute vnode op. */ int nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name, struct ucred *cred, struct thread *p) { int error; /* * Get rid of any delegations. I am not sure why this is required, * but RFC-8276 says so. */ error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p); if (error != 0) goto out; #ifdef MAC error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER, name); if (error != 0) goto out; #endif error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p); if (error == EOPNOTSUPP) error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, cred, p); out: NFSEXITCODE(error); return (error); } /* * List Extended Atribute vnode op into an mbuf list. */ int nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred, struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp) { struct iovec iv; struct uio io; int error; size_t siz; *bufp = NULL; /* First, find out the size of the extended attribute. */ error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred, p); if (error != 0) return (NFSERR_NOXATTR); if (siz <= cookie) { *lenp = 0; *eofp = true; goto out; } if (siz > cookie + *lenp) { siz = cookie + *lenp; *eofp = false; } else *eofp = true; /* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */ if (siz > 10 * 1024 * 1024) { error = NFSERR_XATTR2BIG; goto out; } *bufp = malloc(siz, M_TEMP, M_WAITOK); iv.iov_base = *bufp; iv.iov_len = siz; io.uio_iovcnt = 1; io.uio_iov = &iv; io.uio_offset = 0; io.uio_resid = siz; io.uio_rw = UIO_READ; io.uio_segflg = UIO_SYSSPACE; io.uio_td = p; #ifdef MAC error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER); if (error != 0) goto out; #endif error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred, p); if (error != 0) goto out; if (io.uio_resid > 0) siz -= io.uio_resid; *lenp = siz; out: if (error != 0) { free(*bufp, M_TEMP); *bufp = NULL; } NFSEXITCODE(error); return (error); } /* * Trim trailing data off the mbuf list being built. */ void nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos, int bextpg, int bextpgsiz) { vm_page_t pg; int fullpgsiz, i; if (mb->m_next != NULL) { m_freem(mb->m_next); mb->m_next = NULL; } if ((mb->m_flags & M_EXTPG) != 0) { KASSERT(bextpg >= 0 && bextpg < mb->m_epg_npgs, ("nfsm_trimtrailing: bextpg out of range")); KASSERT(bpos == (char *)(void *) PHYS_TO_DMAP(mb->m_epg_pa[bextpg]) + PAGE_SIZE - bextpgsiz, ("nfsm_trimtrailing: bextpgsiz bad!")); /* First, get rid of any pages after this position. */ for (i = mb->m_epg_npgs - 1; i > bextpg; i--) { pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]); vm_page_unwire_noq(pg); vm_page_free(pg); } mb->m_epg_npgs = bextpg + 1; if (bextpg == 0) fullpgsiz = PAGE_SIZE - mb->m_epg_1st_off; else fullpgsiz = PAGE_SIZE; mb->m_epg_last_len = fullpgsiz - bextpgsiz; mb->m_len = m_epg_pagelen(mb, 0, mb->m_epg_1st_off); for (i = 1; i < mb->m_epg_npgs; i++) mb->m_len += m_epg_pagelen(mb, i, 0); nd->nd_bextpgsiz = bextpgsiz; nd->nd_bextpg = bextpg; } else mb->m_len = bpos - mtod(mb, char *); nd->nd_mb = mb; nd->nd_bpos = bpos; } /* * Check to see if a put file handle operation should test for * NFSERR_WRONGSEC, although NFSv3 actually returns NFSERR_AUTHERR. * When Open is the next operation, NFSERR_WRONGSEC cannot be * replied for the Open cases that use a component. Thia can * be identified by the fact that the file handle's type is VDIR. */ bool nfsrv_checkwrongsec(struct nfsrv_descript *nd, int nextop, enum vtype vtyp) { if ((nd->nd_flag & ND_NFSV4) == 0) { if (nd->nd_procnum == NFSPROC_FSINFO) return (false); return (true); } if ((nd->nd_flag & ND_LASTOP) != 0) return (false); if (nextop == NFSV4OP_PUTROOTFH || nextop == NFSV4OP_PUTFH || nextop == NFSV4OP_PUTPUBFH || nextop == NFSV4OP_RESTOREFH || nextop == NFSV4OP_LOOKUP || nextop == NFSV4OP_LOOKUPP || nextop == NFSV4OP_SECINFO || nextop == NFSV4OP_SECINFONONAME) return (false); if (nextop == NFSV4OP_OPEN && vtyp == VDIR) return (false); return (true); } extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfsd_modevent(module_t mod, int type, void *data) { int error = 0, i; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) goto out; newnfs_portinit(); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, MTX_DEF); mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, MTX_DEF); } mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); nfsrvd_initcache(); nfsd_init(); NFSD_LOCK(); nfsrvd_init(0); NFSD_UNLOCK(); nfsd_mntinit(); #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; #endif nfsd_call_servertimer = nfsrv_servertimer; nfsd_call_nfsd = nfssvc_nfsd; loaded = 1; break; case MOD_UNLOAD: if (newnfs_numnfsd != 0) { error = EBUSY; break; } #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = NULL; vn_deleg_ops.vndeleg_disable = NULL; #endif nfsd_call_servertimer = NULL; nfsd_call_nfsd = NULL; /* Clean out all NFSv4 state. */ nfsrv_throwawayallstate(curthread); /* Clean the NFS server reply cache */ nfsrvd_cleancache(); /* Free up the krpc server pool. */ if (nfsrvd_pool != NULL) svcpool_destroy(nfsrvd_pool); /* and get rid of the locks */ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_destroy(&nfsrchash_table[i].mtx); mtx_destroy(&nfsrcahash_table[i].mtx); } mtx_destroy(&nfsrc_udpmtx); mtx_destroy(&nfs_v4root_mutex); mtx_destroy(&nfsv4root_mnt.mnt_mtx); mtx_destroy(&nfsrv_dontlistlock_mtx); mtx_destroy(&nfsrv_recalllock_mtx); for (i = 0; i < nfsrv_sessionhashsize; i++) mtx_destroy(&nfssessionhash[i].mtx); if (nfslayouthash != NULL) { for (i = 0; i < nfsrv_layouthashsize; i++) mtx_destroy(&nfslayouthash[i].mtx); free(nfslayouthash, M_NFSDSESSION); } lockdestroy(&nfsv4root_mnt.mnt_explock); free(nfsclienthash, M_NFSDCLIENT); free(nfslockhash, M_NFSDLOCKFILE); free(nfssessionhash, M_NFSDSESSION); loaded = 0; break; default: error = EOPNOTSUPP; break; } out: NFSEXITCODE(error); return (error); } static moduledata_t nfsd_mod = { "nfsd", nfsd_modevent, NULL, }; DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfsd, 1); MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); MODULE_DEPEND(nfsd, krpc, 1, 1, 1); MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c index e564a6a48b79..0ba3472b4ff9 100644 --- a/sys/fs/nfsserver/nfs_nfsdserv.c +++ b/sys/fs/nfsserver/nfs_nfsdserv.c @@ -1,6022 +1,6028 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" /* * nfs version 2, 3 and 4 server calls to vnode ops * - these routines generally have 3 phases * 1 - break down and validate rpc request in mbuf list * 2 - do the vnode ops for the request, usually by calling a nfsvno_XXX() * function in nfsd_port.c * 3 - build the rpc reply in an mbuf list * For nfsv4, these functions are called for each Op within the Compound RPC. */ #include #include #include /* Global vars */ extern u_int32_t newnfs_false, newnfs_true; extern enum vtype nv34tov_type[8]; extern struct timeval nfsboottime; extern int nfsrv_enable_crossmntpt; extern int nfsrv_statehashsize; extern int nfsrv_layouthashsize; extern time_t nfsdev_time; extern volatile int nfsrv_devidcnt; extern int nfsd_debuglevel; extern u_long sb_max_adj; extern int nfsrv_pnfsatime; extern int nfsrv_maxpnfsmirror; extern int nfs_maxcopyrange; +extern uint32_t nfs_srvmaxio; static int nfs_async = 0; SYSCTL_DECL(_vfs_nfsd); SYSCTL_INT(_vfs_nfsd, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "Tell client that writes were synced even though they were not"); extern int nfsrv_doflexfile; SYSCTL_INT(_vfs_nfsd, OID_AUTO, default_flexfile, CTLFLAG_RW, &nfsrv_doflexfile, 0, "Make Flex File Layout the default for pNFS"); static int nfsrv_linux42server = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, linux42server, CTLFLAG_RW, &nfsrv_linux42server, 0, "Enable Linux style NFSv4.2 server (non-RFC compliant)"); static bool nfsrv_openaccess = true; SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, v4openaccess, CTLFLAG_RW, &nfsrv_openaccess, 0, "Enable Linux style NFSv4 Open access check"); /* * This list defines the GSS mechanisms supported. * (Don't ask me how you get these strings from the RFC stuff like * iso(1), org(3)... but someone did it, so I don't need to know.) */ static struct nfsgss_mechlist nfsgss_mechlist[] = { { 9, "\052\206\110\206\367\022\001\002\002", 11 }, { 0, "", 0 }, }; /* local functions */ static void nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp, int pathlen); static void nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp); /* * nfs access service (not a part of NFS V2) */ int nfsrvd_access(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { u_int32_t *tl; int getret, error = 0; struct nfsvattr nva; u_int32_t testmode, nfsmode, supported = 0; accmode_t deletebit; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, 1, &nva); goto out; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nfsmode = fxdr_unsigned(u_int32_t, *tl); if ((nd->nd_flag & ND_NFSV4) && (nfsmode & ~(NFSACCESS_READ | NFSACCESS_LOOKUP | NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE | NFSACCESS_EXECUTE | NFSACCESS_XAREAD | NFSACCESS_XAWRITE | NFSACCESS_XALIST))) { nd->nd_repstat = NFSERR_INVAL; vput(vp); goto out; } if (nfsmode & NFSACCESS_READ) { supported |= NFSACCESS_READ; if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_READ; } if (nfsmode & NFSACCESS_MODIFY) { supported |= NFSACCESS_MODIFY; if (nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_MODIFY; } if (nfsmode & NFSACCESS_EXTEND) { supported |= NFSACCESS_EXTEND; if (nfsvno_accchk(vp, VWRITE | VAPPEND, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_EXTEND; } if (nfsmode & NFSACCESS_XAREAD) { supported |= NFSACCESS_XAREAD; if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_XAREAD; } if (nfsmode & NFSACCESS_XAWRITE) { supported |= NFSACCESS_XAWRITE; if (nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_XAWRITE; } if (nfsmode & NFSACCESS_XALIST) { supported |= NFSACCESS_XALIST; if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_XALIST; } if (nfsmode & NFSACCESS_DELETE) { supported |= NFSACCESS_DELETE; if (vp->v_type == VDIR) deletebit = VDELETE_CHILD; else deletebit = VDELETE; if (nfsvno_accchk(vp, deletebit, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_DELETE; } if (vnode_vtype(vp) == VDIR) testmode = NFSACCESS_LOOKUP; else testmode = NFSACCESS_EXECUTE; if (nfsmode & testmode) { supported |= (nfsmode & testmode); if (nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~testmode; } nfsmode &= supported; if (nd->nd_flag & ND_NFSV3) { getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); nfsrv_postopattr(nd, getret, &nva); } vput(vp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(supported); } else NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsmode); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs getattr service */ int nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { struct nfsvattr nva; fhandle_t fh; int at_root = 0, error = 0, supports_nfsv4acls; struct nfsreferral *refp; nfsattrbit_t attrbits, tmpbits; struct mount *mp; struct vnode *tvp = NULL; struct vattr va; uint64_t mounted_on_fileno = 0; accmode_t accmode; struct thread *p = curthread; if (nd->nd_repstat) goto out; if (nd->nd_flag & ND_NFSV4) { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) { vput(vp); goto out; } /* * Check for a referral. */ refp = nfsv4root_getreferral(vp, NULL, 0); if (refp != NULL) { (void) nfsrv_putreferralattr(nd, &attrbits, refp, 1, &nd->nd_repstat); vput(vp); goto out; } if (nd->nd_repstat == 0) { accmode = 0; NFSSET_ATTRBIT(&tmpbits, &attrbits); /* * GETATTR with write-only attr time_access_set and time_modify_set * should return NFS4ERR_INVAL. */ if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEACCESSSET) || NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEMODIFYSET)){ error = NFSERR_INVAL; vput(vp); goto out; } if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_ACL)) { NFSCLRBIT_ATTRBIT(&tmpbits, NFSATTRBIT_ACL); accmode |= VREAD_ACL; } if (NFSNONZERO_ATTRBIT(&tmpbits)) accmode |= VREAD_ATTRIBUTES; if (accmode != 0) nd->nd_repstat = nfsvno_accchk(vp, accmode, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); } } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_FILEHANDLE)) nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) nd->nd_repstat = nfsrv_checkgetattr(nd, vp, &nva, &attrbits, p); if (nd->nd_repstat == 0) { supports_nfsv4acls = nfs_supportsnfsv4acls(vp); mp = vp->v_mount; if (nfsrv_enable_crossmntpt != 0 && vp->v_type == VDIR && (vp->v_vflag & VV_ROOT) != 0 && vp != rootvnode) { tvp = mp->mnt_vnodecovered; VREF(tvp); at_root = 1; } else at_root = 0; vfs_ref(mp); NFSVOPUNLOCK(vp); if (at_root != 0) { if ((nd->nd_repstat = NFSVOPLOCK(tvp, LK_SHARED)) == 0) { nd->nd_repstat = VOP_GETATTR( tvp, &va, nd->nd_cred); vput(tvp); } else vrele(tvp); if (nd->nd_repstat == 0) mounted_on_fileno = (uint64_t) va.va_fileid; else at_root = 0; } if (nd->nd_repstat == 0) nd->nd_repstat = vfs_busy(mp, 0); vfs_rel(mp); if (nd->nd_repstat == 0) { (void)nfsvno_fillattr(nd, mp, vp, &nva, &fh, 0, &attrbits, nd->nd_cred, p, isdgram, 1, supports_nfsv4acls, at_root, mounted_on_fileno); vfs_unbusy(mp); } vrele(vp); } else vput(vp); } else { nfsrv_fillattr(nd, &nva); vput(vp); } } else { vput(vp); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs setattr service */ int nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { struct nfsvattr nva, nva2; u_int32_t *tl; int preat_ret = 1, postat_ret = 1, gcheck = 0, error = 0; int gotproxystateid; struct timespec guard = { 0, 0 }; nfsattrbit_t attrbits, retbits; nfsv4stateid_t stateid; NFSACL_T *aclp = NULL; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); goto out; } #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif gotproxystateid = 0; NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); stateid.other[0] = *tl++; stateid.other[1] = *tl++; stateid.other[2] = *tl; if (stateid.other[0] == 0x55555555 && stateid.other[1] == 0x55555555 && stateid.other[2] == 0x55555555 && stateid.seqid == 0xffffffff) gotproxystateid = 1; } error = nfsrv_sattr(nd, vp, &nva, &attrbits, aclp, p); if (error) goto nfsmout; /* For NFSv4, only va_uid is used from nva2. */ NFSZERO_ATTRBIT(&retbits); NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER); preat_ret = nfsvno_getattr(vp, &nva2, nd, p, 1, &retbits); if (!nd->nd_repstat) nd->nd_repstat = preat_ret; NFSZERO_ATTRBIT(&retbits); if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); gcheck = fxdr_unsigned(int, *tl); if (gcheck) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &guard); } if (!nd->nd_repstat && gcheck && (nva2.na_ctime.tv_sec != guard.tv_sec || nva2.na_ctime.tv_nsec != guard.tv_nsec)) nd->nd_repstat = NFSERR_NOT_SYNC; if (nd->nd_repstat) { vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); goto out; } } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); /* * Now that we have all the fields, lets do it. * If the size is being changed write access is required, otherwise * just check for a read only file system. */ if (!nd->nd_repstat) { if (NFSVNO_NOTSETSIZE(&nva)) { if (NFSVNO_EXRDONLY(exp) || (vfs_flags(vp->v_mount) & MNT_RDONLY)) nd->nd_repstat = EROFS; } else { if (vnode_vtype(vp) != VREG) nd->nd_repstat = EINVAL; else if (nva2.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp)) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); } } /* * Proxy operations from the MDS are allowed via the all 0s special * stateid. */ if (nd->nd_repstat == 0 && (nd->nd_flag & ND_NFSV4) != 0 && gotproxystateid == 0) nd->nd_repstat = nfsrv_checksetattr(vp, nd, &stateid, &nva, &attrbits, exp, p); if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) { /* * For V4, try setting the attrbutes in sets, so that the * reply bitmap will be correct for an error case. */ if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER) || NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, uid, nva.na_uid); NFSVNO_SETATTRVAL(&nva2, gid, nva.na_gid); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER); if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNERGROUP); } } if (!nd->nd_repstat && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SIZE)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, size, nva.na_size); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SIZE); } if (!nd->nd_repstat && (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET) || NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET))) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, atime, nva.na_atime); NFSVNO_SETATTRVAL(&nva2, mtime, nva.na_mtime); if (nva.na_vaflags & VA_UTIMES_NULL) { nva2.na_vaflags |= VA_UTIMES_NULL; NFSVNO_SETACTIVE(&nva2, vaflags); } nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEACCESSSET); if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEMODIFYSET); } } if (!nd->nd_repstat && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, btime, nva.na_btime); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMECREATE); } if (!nd->nd_repstat && (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODE) || NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODESETMASKED))) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, mode, nva.na_mode); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODE)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODE); if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODESETMASKED)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODESETMASKED); } } #ifdef NFS4_ACL_EXTATTR_NAME if (!nd->nd_repstat && aclp->acl_cnt > 0 && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_ACL)) { nd->nd_repstat = nfsrv_setacl(vp, aclp, nd->nd_cred, p); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_ACL); } #endif } else if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_setattr(vp, &nva, nd->nd_cred, p, exp); } if (nd->nd_flag & (ND_NFSV2 | ND_NFSV3)) { postat_ret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = postat_ret; } vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); else if (nd->nd_flag & ND_NFSV4) (void) nfsrv_putattrbit(nd, &retbits); else if (!nd->nd_repstat) nfsrv_fillattr(nd, &nva); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV4) { /* * For all nd_repstat, the V4 reply includes a bitmap, * even NFSERR_BADXDR, which is what this will end up * returning. */ (void) nfsrv_putattrbit(nd, &retbits); } NFSEXITCODE2(error, nd); return (error); } /* * nfs lookup rpc * (Also performs lookup parent for v4) */ int nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, struct nfsexstuff *exp) { struct nameidata named; vnode_t vp, dirp = NULL; int error = 0, dattr_ret = 1; struct nfsvattr nva, dattr; char *bufp; u_long *hashp; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } /* * For some reason, if dp is a symlink, the error * returned is supposed to be NFSERR_SYMLINK and not NFSERR_NOTDIR. */ if (dp->v_type == VLNK && (nd->nd_flag & ND_NFSV4)) { nd->nd_repstat = NFSERR_SYMLINK; vrele(dp); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (nd->nd_repstat) { if (dirp) { if (nd->nd_flag & ND_NFSV3) dattr_ret = nfsvno_getattr(dirp, &dattr, nd, p, 0, NULL); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } if (named.ni_startdir) vrele(named.ni_startdir); nfsvno_relpathbuf(&named); vp = named.ni_vp; if ((nd->nd_flag & ND_NFSV4) != 0 && !NFSVNO_EXPORTED(exp) && vp->v_type != VDIR && vp->v_type != VLNK) /* * Only allow lookup of VDIR and VLNK for traversal of * non-exported volumes during NFSv4 mounting. */ nd->nd_repstat = ENOENT; if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (vpp != NULL && nd->nd_repstat == 0) *vpp = vp; else vput(vp); if (dirp) { if (nd->nd_flag & ND_NFSV3) dattr_ret = nfsvno_getattr(dirp, &dattr, nd, p, 0, NULL); vrele(dirp); } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } if (nd->nd_flag & ND_NFSV2) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_fillattr(nd, &nva); } else if (nd->nd_flag & ND_NFSV3) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_postopattr(nd, 0, &nva); nfsrv_postopattr(nd, dattr_ret, &dattr); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs readlink service */ int nfsrvd_readlink(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct mbuf *mp = NULL, *mpend = NULL; int getret = 1, len; struct nfsvattr nva; struct thread *p = curthread; uint16_t off; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &nva); goto out; } if (vnode_vtype(vp) != VLNK) { if (nd->nd_flag & ND_NFSV2) nd->nd_repstat = ENXIO; else nd->nd_repstat = EINVAL; } if (nd->nd_repstat == 0) { if ((nd->nd_flag & ND_EXTPG) != 0) nd->nd_repstat = nfsvno_readlink(vp, nd->nd_cred, nd->nd_maxextsiz, p, &mp, &mpend, &len); else nd->nd_repstat = nfsvno_readlink(vp, nd->nd_cred, 0, p, &mp, &mpend, &len); } if (nd->nd_flag & ND_NFSV3) getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); if (nd->nd_repstat) goto out; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(len); if (mp != NULL) { nd->nd_mb->m_next = mp; nd->nd_mb = mpend; if ((mpend->m_flags & M_EXTPG) != 0) { nd->nd_bextpg = mpend->m_epg_npgs - 1; nd->nd_bpos = (char *)(void *) PHYS_TO_DMAP(mpend->m_epg_pa[nd->nd_bextpg]); off = (nd->nd_bextpg == 0) ? mpend->m_epg_1st_off : 0; nd->nd_bpos += off + mpend->m_epg_last_len; nd->nd_bextpgsiz = PAGE_SIZE - mpend->m_epg_last_len - off; } else nd->nd_bpos = mtod(mpend, char *) + mpend->m_len; } out: NFSEXITCODE2(0, nd); return (0); } /* * nfs read service */ int nfsrvd_read(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { u_int32_t *tl; int error = 0, cnt, getret = 1, gotproxystateid, reqlen, eof = 0; struct mbuf *m2, *m3; struct nfsvattr nva; off_t off = 0x0; struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; nfsv4stateid_t stateid; nfsquad_t clientid; struct thread *p = curthread; uint16_t poff; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &nva); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_int32_t, *tl++); reqlen = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; reqlen = fxdr_unsigned(int, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3*NFSX_UNSIGNED); reqlen = fxdr_unsigned(int, *(tl + 6)); } if (reqlen > NFS_SRVMAXDATA(nd)) { reqlen = NFS_SRVMAXDATA(nd); } else if (reqlen < 0) { error = EBADRPC; goto nfsmout; } gotproxystateid = 0; if (nd->nd_flag & ND_NFSV4) { stp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS); lop->lo_flags = NFSLCK_READ; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK1 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } stp->ls_stateid.other[2] = *tl++; /* * Don't allow the client to use a special stateid for a DS op. */ if ((nd->nd_flag & ND_DSSERVER) != 0 && ((stp->ls_stateid.other[0] == 0x0 && stp->ls_stateid.other[1] == 0x0 && stp->ls_stateid.other[2] == 0x0) || (stp->ls_stateid.other[0] == 0xffffffff && stp->ls_stateid.other[1] == 0xffffffff && stp->ls_stateid.other[2] == 0xffffffff) || stp->ls_stateid.seqid != 0)) nd->nd_repstat = NFSERR_BADSTATEID; /* However, allow the proxy stateid. */ if (stp->ls_stateid.seqid == 0xffffffff && stp->ls_stateid.other[0] == 0x55555555 && stp->ls_stateid.other[1] == 0x55555555 && stp->ls_stateid.other[2] == 0x55555555) gotproxystateid = 1; off = fxdr_hyper(tl); lop->lo_first = off; tl += 2; lop->lo_end = off + reqlen; /* * Paranoia, just in case it wraps around. */ if (lop->lo_end < off) lop->lo_end = NFS64BITSSET; } if (vnode_vtype(vp) != VREG) { if (nd->nd_flag & ND_NFSV3) nd->nd_repstat = EINVAL; else nd->nd_repstat = (vnode_vtype(vp) == VDIR) ? EISDIR : EINVAL; } getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; if (!nd->nd_repstat && (nva.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } /* * DS reads are marked by ND_DSSERVER or use the proxy special * stateid. */ if (nd->nd_repstat == 0 && (nd->nd_flag & (ND_NFSV4 | ND_DSSERVER)) == ND_NFSV4 && gotproxystateid == 0) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); goto out; } if (off >= nva.na_size) { cnt = 0; eof = 1; } else if (reqlen == 0) cnt = 0; else if ((off + reqlen) >= nva.na_size) { cnt = nva.na_size - off; eof = 1; } else cnt = reqlen; m3 = NULL; if (cnt > 0) { /* * If cnt > MCLBYTES and the reply will not be saved, use * ext_pgs mbufs for TLS. * For NFSv4.0, we do not know for sure if the reply will * be saved, so do not use ext_pgs mbufs for NFSv4.0. * Always use ext_pgs mbufs if ND_EXTPG is set. */ if ((nd->nd_flag & ND_EXTPG) != 0 || (cnt > MCLBYTES && (nd->nd_flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS && (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)) nd->nd_repstat = nfsvno_read(vp, off, cnt, nd->nd_cred, nd->nd_maxextsiz, p, &m3, &m2); else nd->nd_repstat = nfsvno_read(vp, off, cnt, nd->nd_cred, 0, p, &m3, &m2); if (!(nd->nd_flag & ND_NFSV4)) { getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; } if (nd->nd_repstat) { vput(vp); if (m3) m_freem(m3); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); goto out; } } vput(vp); if (nd->nd_flag & ND_NFSV2) { nfsrv_fillattr(nd, &nva); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); } else { if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &nva); NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(cnt); } else NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (eof) *tl++ = newnfs_true; else *tl++ = newnfs_false; } *tl = txdr_unsigned(cnt); if (m3) { nd->nd_mb->m_next = m3; nd->nd_mb = m2; if ((m2->m_flags & M_EXTPG) != 0) { nd->nd_flag |= ND_EXTPG; nd->nd_bextpg = m2->m_epg_npgs - 1; nd->nd_bpos = (char *)(void *) PHYS_TO_DMAP(m2->m_epg_pa[nd->nd_bextpg]); poff = (nd->nd_bextpg == 0) ? m2->m_epg_1st_off : 0; nd->nd_bpos += poff + m2->m_epg_last_len; nd->nd_bextpgsiz = PAGE_SIZE - m2->m_epg_last_len - poff; } else nd->nd_bpos = mtod(m2, char *) + m2->m_len; } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs write service */ int nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { u_int32_t *tl; struct nfsvattr nva, forat; int aftat_ret = 1, retlen, len, error = 0, forat_ret = 1; int gotproxystateid, stable = NFSWRITE_FILESYNC; off_t off; struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; nfsv4stateid_t stateid; nfsquad_t clientid; nfsattrbit_t attrbits; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); goto out; } gotproxystateid = 0; if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_int32_t, *++tl); tl += 2; retlen = len = fxdr_unsigned(int32_t, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 3; stable = fxdr_unsigned(int, *tl++); retlen = len = fxdr_unsigned(int32_t, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 4 * NFSX_UNSIGNED); stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK2 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } stp->ls_stateid.other[2] = *tl++; /* * Don't allow the client to use a special stateid for a DS op. */ if ((nd->nd_flag & ND_DSSERVER) != 0 && ((stp->ls_stateid.other[0] == 0x0 && stp->ls_stateid.other[1] == 0x0 && stp->ls_stateid.other[2] == 0x0) || (stp->ls_stateid.other[0] == 0xffffffff && stp->ls_stateid.other[1] == 0xffffffff && stp->ls_stateid.other[2] == 0xffffffff) || stp->ls_stateid.seqid != 0)) nd->nd_repstat = NFSERR_BADSTATEID; /* However, allow the proxy stateid. */ if (stp->ls_stateid.seqid == 0xffffffff && stp->ls_stateid.other[0] == 0x55555555 && stp->ls_stateid.other[1] == 0x55555555 && stp->ls_stateid.other[2] == 0x55555555) gotproxystateid = 1; off = fxdr_hyper(tl); lop->lo_first = off; tl += 2; stable = fxdr_unsigned(int, *tl++); retlen = len = fxdr_unsigned(int32_t, *tl); lop->lo_end = off + len; /* * Paranoia, just in case it wraps around, which shouldn't * ever happen anyhow. */ if (lop->lo_end < lop->lo_first) lop->lo_end = NFS64BITSSET; } - if (retlen > NFS_SRVMAXIO || retlen < 0) + if (retlen > nfs_srvmaxio || retlen < 0) nd->nd_repstat = EIO; if (vnode_vtype(vp) != VREG && !nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nd->nd_repstat = EINVAL; else nd->nd_repstat = (vnode_vtype(vp) == VDIR) ? EISDIR : EINVAL; } NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); forat_ret = nfsvno_getattr(vp, &forat, nd, p, 1, &attrbits); if (!nd->nd_repstat) nd->nd_repstat = forat_ret; if (!nd->nd_repstat && (forat.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); /* * DS reads are marked by ND_DSSERVER or use the proxy special * stateid. */ if (nd->nd_repstat == 0 && (nd->nd_flag & (ND_NFSV4 | ND_DSSERVER)) == ND_NFSV4 && gotproxystateid == 0) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); goto out; } /* * For NFS Version 2, it is not obvious what a write of zero length * should do, but I might as well be consistent with Version 3, * which is to return ok so long as there are no permission problems. */ if (retlen > 0) { nd->nd_repstat = nfsvno_write(vp, off, retlen, &stable, nd->nd_md, nd->nd_dpos, nd->nd_cred, p); error = nfsm_advance(nd, NFSM_RNDUP(retlen), -1); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV4) aftat_ret = 0; else aftat_ret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); vput(vp); if (!nd->nd_repstat) nd->nd_repstat = aftat_ret; if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); if (nd->nd_repstat) goto out; NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(retlen); /* * If nfs_async is set, then pretend the write was FILESYNC. * Warning: Doing this violates RFC1813 and runs a risk * of data written by a client being lost when the server * crashes/reboots. */ if (stable == NFSWRITE_UNSTABLE && nfs_async == 0) *tl++ = txdr_unsigned(stable); else *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(nfsboottime.tv_usec); } else if (!nd->nd_repstat) nfsrv_fillattr(nd, &nva); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs create service (creates regular files for V2 and V3. Spec. files for V2.) * now does a truncate to 0 length via. setattr if it already exists * The core creation routine has been extracted out into nfsrv_creatsub(), * so it can also be used by nfsrv_open() for V4. */ int nfsrvd_create(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nfsv2_sattr *sp; struct nameidata named; u_int32_t *tl; int error = 0, tsize, dirfor_ret = 1, diraft_ret = 1; int how = NFSCREATE_UNCHECKED, exclusive_flag = 0; NFSDEV_T rdev = 0; vnode_t vp = NULL, dirp = NULL; fhandle_t fh; char *bufp; u_long *hashp; enum vtype vtyp; int32_t cverf[2], tverf[2] = { 0, 0 }; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vtyp = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode)); if (vtyp == VNON) vtyp = VREG; NFSVNO_SETATTRVAL(&nva, type, vtyp); NFSVNO_SETATTRVAL(&nva, mode, nfstov_mode(sp->sa_mode)); switch (nva.na_type) { case VREG: tsize = fxdr_unsigned(int32_t, sp->sa_size); if (tsize != -1) NFSVNO_SETATTRVAL(&nva, size, (u_quad_t)tsize); break; case VCHR: case VBLK: case VFIFO: rdev = fxdr_unsigned(NFSDEV_T, sp->sa_size); break; default: break; } } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSCREATE_GUARDED: case NFSCREATE_UNCHECKED: error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p); if (error) goto nfsmout; break; case NFSCREATE_EXCLUSIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; exclusive_flag = 1; break; } NFSVNO_SETATTRVAL(&nva, type, VREG); } } if (nd->nd_repstat) { nfsvno_relpathbuf(&named); if (nd->nd_flag & ND_NFSV3) { dirfor_ret = nfsvno_getattr(dp, &dirfor, nd, p, 1, NULL); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } vput(dp); goto out; } nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); if (dirp) { if (nd->nd_flag & ND_NFSV2) { vrele(dirp); dirp = NULL; } else { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); } } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); goto out; } if (!(nd->nd_flag & ND_NFSV2)) { switch (how) { case NFSCREATE_GUARDED: if (named.ni_vp) nd->nd_repstat = EEXIST; break; case NFSCREATE_UNCHECKED: break; case NFSCREATE_EXCLUSIVE: if (named.ni_vp == NULL) NFSVNO_SETATTRVAL(&nva, mode, 0); break; } } /* * Iff doesn't exist, create it * otherwise just truncate to 0 length * should I set the mode too ? */ nd->nd_repstat = nfsvno_createsub(nd, &named, &vp, &nva, &exclusive_flag, cverf, rdev, exp); if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); vput(vp); if (!nd->nd_repstat) { tverf[0] = nva.na_atime.tv_sec; tverf[1] = nva.na_atime.tv_nsec; } } if (nd->nd_flag & ND_NFSV2) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0); nfsrv_fillattr(nd, &nva); } } else { if (exclusive_flag && !nd->nd_repstat && (cverf[0] != tverf[0] || cverf[1] != tverf[1])) nd->nd_repstat = EEXIST; diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(dp); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } /* * nfs v3 mknod service (and v4 create) */ int nfsrvd_mknod(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; u_int32_t *tl; struct nameidata named; int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen; u_int32_t major, minor; enum vtype vtyp = VNON; nfstype nfs4type = NFNON; vnode_t vp, dirp = NULL; nfsattrbit_t attrbits; char *bufp = NULL, *pathcp = NULL; u_long *hashp, cnflags; NFSACL_T *aclp = NULL; struct thread *p = curthread; NFSVNO_ATTRINIT(&nva); cnflags = (LOCKPARENT | SAVESTART); if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif /* * For V4, the creation stuff is here, Yuck! */ if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); vtyp = nfsv34tov_type(*tl); nfs4type = fxdr_unsigned(nfstype, *tl); switch (nfs4type) { case NFLNK: error = nfsvno_getsymlink(nd, &nva, p, &pathcp, &pathlen); if (error) goto nfsmout; break; case NFCHR: case NFBLK: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_int32_t, *tl++); minor = fxdr_unsigned(u_int32_t, *tl); nva.na_rdev = NFSMAKEDEV(major, minor); break; case NFSOCK: case NFFIFO: break; case NFDIR: cnflags = (LOCKPARENT | SAVENAME); break; default: nd->nd_repstat = NFSERR_BADTYPE; vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif goto out; } } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, cnflags | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); vtyp = nfsv34tov_type(*tl); } error = nfsrv_sattr(nd, NULL, &nva, &attrbits, aclp, p); if (error) goto nfsmout; nva.na_type = vtyp; if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV3) && (vtyp == VCHR || vtyp == VBLK)) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_int32_t, *tl++); minor = fxdr_unsigned(u_int32_t, *tl); nva.na_rdev = NFSMAKEDEV(major, minor); } } dirfor_ret = nfsvno_getattr(dp, &dirfor, nd, p, 0, NULL); if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) { if (!dirfor_ret && NFSVNO_ISSETGID(&nva) && dirfor.na_gid == nva.na_gid) NFSVNO_UNSET(&nva, gid); nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); } if (nd->nd_repstat) { vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif nfsvno_relpathbuf(&named); if (pathcp) free(pathcp, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } /* * Yuck! For V4, mkdir and link are here and some V4 clients don't fill * in va_mode, so we'll have to set a default here. */ if (NFSVNO_NOTSETMODE(&nva)) { if (vtyp == VLNK) nva.na_mode = 0755; else nva.na_mode = 0400; } if (vtyp == VDIR) named.ni_cnd.cn_flags |= WILLBEDIR; nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); if (nd->nd_repstat) { if (dirp) { if (nd->nd_flag & ND_NFSV3) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); vrele(dirp); } #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (dirp) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); if ((nd->nd_flag & ND_NFSV4) && (vtyp == VDIR || vtyp == VLNK)) { if (vtyp == VDIR) { nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, &attrbits, aclp, p, exp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif goto out; } else if (vtyp == VLNK) { nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, &attrbits, aclp, p, exp, pathcp, pathlen); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif free(pathcp, M_TEMP); goto out; } } nd->nd_repstat = nfsvno_mknod(&named, &nva, nd->nd_cred, p); if (!nd->nd_repstat) { vp = named.ni_vp; nfsrv_fixattr(nd, vp, &nva, aclp, p, &attrbits, exp); nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if ((nd->nd_flag & ND_NFSV3) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (vpp != NULL && nd->nd_repstat == 0) { NFSVOPUNLOCK(vp); *vpp = vp; } else vput(vp); } diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } else { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); (void) nfsrv_putattrbit(nd, &attrbits); } } if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif out: NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (bufp) nfsvno_relpathbuf(&named); if (pathcp) free(pathcp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * nfs remove service */ int nfsrvd_remove(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, struct nfsexstuff *exp) { struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1; vnode_t dirp = NULL; struct nfsvattr dirfor, diraft; char *bufp; u_long *hashp; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, DELETE, LOCKPARENT | LOCKLEAF); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vput(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); } else { vput(dp); nfsvno_relpathbuf(&named); } if (dirp) { if (!(nd->nd_flag & ND_NFSV2)) { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); } else { vrele(dirp); dirp = NULL; } } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { if (vnode_vtype(named.ni_vp) == VDIR) nd->nd_repstat = nfsvno_rmdirsub(&named, 1, nd->nd_cred, p, exp); else nd->nd_repstat = nfsvno_removesub(&named, 1, nd->nd_cred, p, exp); } else if (nd->nd_procnum == NFSPROC_RMDIR) { nd->nd_repstat = nfsvno_rmdirsub(&named, 0, nd->nd_cred, p, exp); } else { nd->nd_repstat = nfsvno_removesub(&named, 0, nd->nd_cred, p, exp); } } if (!(nd->nd_flag & ND_NFSV2)) { if (dirp) { diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); } } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs rename service */ int nfsrvd_rename(struct nfsrv_descript *nd, int isdgram, vnode_t dp, vnode_t todp, struct nfsexstuff *exp, struct nfsexstuff *toexp) { u_int32_t *tl; int error = 0, fdirfor_ret = 1, fdiraft_ret = 1; int tdirfor_ret = 1, tdiraft_ret = 1; struct nameidata fromnd, tond; vnode_t fdirp = NULL, tdirp = NULL, tdp = NULL; struct nfsvattr fdirfor, fdiraft, tdirfor, tdiraft; struct nfsexstuff tnes; struct nfsrvfh tfh; char *bufp, *tbufp = NULL; u_long *hashp; fhandle_t fh; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); goto out; } if (!(nd->nd_flag & ND_NFSV2)) fdirfor_ret = nfsvno_getattr(dp, &fdirfor, nd, p, 1, NULL); tond.ni_cnd.cn_nameiop = 0; tond.ni_startdir = NULL; NFSNAMEICNDSET(&fromnd.ni_cnd, nd->nd_cred, DELETE, WANTPARENT | SAVESTART); nfsvno_setpathbuf(&fromnd, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &fromnd.ni_pathlen); if (error) { vput(dp); if (todp) vrele(todp); nfsvno_relpathbuf(&fromnd); goto out; } /* * Unlock dp in this code section, so it is unlocked before * tdp gets locked. This avoids a potential LOR if tdp is the * parent directory of dp. */ if (nd->nd_flag & ND_NFSV4) { tdp = todp; tnes = *toexp; if (dp != tdp) { NFSVOPUNLOCK(dp); /* Might lock tdp. */ tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 0, NULL); } else { tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1, NULL); NFSVOPUNLOCK(dp); } } else { tfh.nfsrvfh_len = 0; error = nfsrv_mtofh(nd, &tfh); if (error == 0) error = nfsvno_getfh(dp, &fh, p); if (error) { vput(dp); /* todp is always NULL except NFSv4 */ nfsvno_relpathbuf(&fromnd); goto out; } /* If this is the same file handle, just VREF() the vnode. */ if (tfh.nfsrvfh_len == NFSX_MYFH && !NFSBCMP(tfh.nfsrvfh_data, &fh, NFSX_MYFH)) { VREF(dp); tdp = dp; tnes = *exp; tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1, NULL); NFSVOPUNLOCK(dp); } else { NFSVOPUNLOCK(dp); nd->nd_cred->cr_uid = nd->nd_saveduid; nfsd_fhtovp(nd, &tfh, LK_EXCLUSIVE, &tdp, &tnes, NULL, 0, -1); /* Locks tdp. */ if (tdp) { tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1, NULL); NFSVOPUNLOCK(tdp); } } } NFSNAMEICNDSET(&tond.ni_cnd, nd->nd_cred, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART); nfsvno_setpathbuf(&tond, &tbufp, &hashp); if (!nd->nd_repstat) { error = nfsrv_parsename(nd, tbufp, hashp, &tond.ni_pathlen); if (error) { if (tdp) vrele(tdp); vrele(dp); nfsvno_relpathbuf(&fromnd); nfsvno_relpathbuf(&tond); goto out; } } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } if (tdp) vrele(tdp); vrele(dp); nfsvno_relpathbuf(&fromnd); nfsvno_relpathbuf(&tond); goto out; } /* * Done parsing, now down to business. */ nd->nd_repstat = nfsvno_namei(nd, &fromnd, dp, 0, exp, p, &fdirp); if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } if (fdirp) vrele(fdirp); if (tdp) vrele(tdp); nfsvno_relpathbuf(&tond); goto out; } if (vnode_vtype(fromnd.ni_vp) == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; nd->nd_repstat = nfsvno_namei(nd, &tond, tdp, 0, &tnes, p, &tdirp); nd->nd_repstat = nfsvno_rename(&fromnd, &tond, nd->nd_repstat, nd->nd_flag, nd->nd_cred, p); if (fdirp) fdiraft_ret = nfsvno_getattr(fdirp, &fdiraft, nd, p, 0, NULL); if (tdirp) tdiraft_ret = nfsvno_getattr(tdirp, &tdiraft, nd, p, 0, NULL); if (fdirp) vrele(fdirp); if (tdirp) vrele(tdirp); if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 10 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(fdirfor.na_filerev, tl); tl += 2; txdr_hyper(fdiraft.na_filerev, tl); tl += 2; *tl++ = newnfs_false; txdr_hyper(tdirfor.na_filerev, tl); tl += 2; txdr_hyper(tdiraft.na_filerev, tl); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs link service */ int nfsrvd_link(struct nfsrv_descript *nd, int isdgram, vnode_t vp, vnode_t tovp, struct nfsexstuff *exp, struct nfsexstuff *toexp) { struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1, getret = 1; vnode_t dirp = NULL, dp = NULL; struct nfsvattr dirfor, diraft, at; struct nfsexstuff tnes; struct nfsrvfh dfh; char *bufp; u_long *hashp; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSVOPUNLOCK(vp); if (vnode_vtype(vp) == VDIR) { if (nd->nd_flag & ND_NFSV4) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; if (tovp) vrele(tovp); } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { dp = tovp; tnes = *toexp; } else { error = nfsrv_mtofh(nd, &dfh); if (error) { vrele(vp); /* tovp is always NULL unless NFSv4 */ goto out; } nfsd_fhtovp(nd, &dfh, LK_EXCLUSIVE, &dp, &tnes, NULL, 0, -1); if (dp) NFSVOPUNLOCK(dp); } } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVENAME | NOCACHE); if (!nd->nd_repstat) { nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(vp); if (dp) vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, &tnes, p, &dirp); } else { if (dp) vrele(dp); nfsvno_relpathbuf(&named); } } if (dirp) { if (nd->nd_flag & ND_NFSV2) { vrele(dirp); dirp = NULL; } else { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); } } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_link(&named, vp, nd->nd_cred, p, exp); if (nd->nd_flag & ND_NFSV3) getret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); if (dirp) { diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); } vrele(vp); if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &at); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs symbolic link service */ int nfsrvd_symlink(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nameidata named; int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen; vnode_t dirp = NULL; char *bufp, *pathcp = NULL; u_long *hashp; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (vpp) *vpp = NULL; NFSVNO_ATTRINIT(&nva); NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (!error && !nd->nd_repstat) error = nfsvno_getsymlink(nd, &nva, p, &pathcp, &pathlen); if (error) { vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) { vrele(dirp); dirp = NULL; } /* * And call nfsrvd_symlinksub() to do the common code. It will * return EBADRPC upon a parsing error, 0 otherwise. */ if (!nd->nd_repstat) { if (dirp != NULL) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, NULL, NULL, p, exp, pathcp, pathlen); } else if (dirp != NULL) { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); vrele(dirp); } if (pathcp) free(pathcp, M_TEMP); if (nd->nd_flag & ND_NFSV3) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } out: NFSEXITCODE2(error, nd); return (error); } /* * Common code for creating a symbolic link. */ static void nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp, int pathlen) { u_int32_t *tl; nd->nd_repstat = nfsvno_symlink(ndp, nvap, pathcp, pathlen, !(nd->nd_flag & ND_NFSV2), nd->nd_saveduid, nd->nd_cred, p, exp); if (!nd->nd_repstat && !(nd->nd_flag & ND_NFSV2)) { nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp); if (nd->nd_flag & ND_NFSV3) { nd->nd_repstat = nfsvno_getfh(ndp->ni_vp, fhp, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(ndp->ni_vp, nvap, nd, p, 1, NULL); } if (vpp != NULL && nd->nd_repstat == 0) { NFSVOPUNLOCK(ndp->ni_vp); *vpp = ndp->ni_vp; } else vput(ndp->ni_vp); } if (dirp) { *diraft_retp = nfsvno_getattr(dirp, diraftp, nd, p, 0, NULL); vrele(dirp); } if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirforp->na_filerev, tl); tl += 2; txdr_hyper(diraftp->na_filerev, tl); (void) nfsrv_putattrbit(nd, attrbitp); } NFSEXITCODE2(0, nd); } /* * nfs mkdir service */ int nfsrvd_mkdir(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1; vnode_t dirp = NULL; char *bufp; u_long *hashp; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVENAME | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV3) { error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p); if (error) goto nfsmout; } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nva.na_mode = nfstov_mode(*tl++); } } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) { vrele(dirp); dirp = NULL; } if (nd->nd_repstat) { if (dirp != NULL) { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (dirp != NULL) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); /* * Call nfsrvd_mkdirsub() for the code common to V4 as well. */ nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, NULL, NULL, p, exp); if (nd->nd_flag & ND_NFSV3) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_fillattr(nd, &nva); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } /* * Code common to mkdir for V2,3 and 4. */ static void nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp) { vnode_t vp; u_int32_t *tl; NFSVNO_SETATTRVAL(nvap, type, VDIR); nd->nd_repstat = nfsvno_mkdir(ndp, nvap, nd->nd_saveduid, nd->nd_cred, p, exp); if (!nd->nd_repstat) { vp = ndp->ni_vp; nfsrv_fixattr(nd, vp, nvap, aclp, p, attrbitp, exp); nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, nvap, nd, p, 1, NULL); if (vpp && !nd->nd_repstat) { NFSVOPUNLOCK(vp); *vpp = vp; } else { vput(vp); } } if (dirp) { *diraft_retp = nfsvno_getattr(dirp, diraftp, nd, p, 0, NULL); vrele(dirp); } if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirforp->na_filerev, tl); tl += 2; txdr_hyper(diraftp->na_filerev, tl); (void) nfsrv_putattrbit(nd, attrbitp); } NFSEXITCODE2(0, nd); } /* * nfs commit service */ int nfsrvd_commit(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { struct nfsvattr bfor, aft; u_int32_t *tl; int error = 0, for_ret = 1, aft_ret = 1, cnt; u_int64_t off; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft); goto out; } /* Return NFSERR_ISDIR in NFSv4 when commit on a directory. */ if (vp->v_type != VREG) { if (nd->nd_flag & ND_NFSV3) error = NFSERR_NOTSUPP; else error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); /* * XXX At this time VOP_FSYNC() does not accept offset and byte * count parameters, so these arguments are useless (someday maybe). */ off = fxdr_hyper(tl); tl += 2; cnt = fxdr_unsigned(int, *tl); if (nd->nd_flag & ND_NFSV3) for_ret = nfsvno_getattr(vp, &bfor, nd, p, 1, NULL); nd->nd_repstat = nfsvno_fsync(vp, off, cnt, nd->nd_cred, p); if (nd->nd_flag & ND_NFSV3) { aft_ret = nfsvno_getattr(vp, &aft, nd, p, 1, NULL); nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft); } vput(vp); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(nfsboottime.tv_usec); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs statfs service */ int nfsrvd_statfs(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { struct statfs *sf; u_int32_t *tl; int getret = 1; struct nfsvattr at; u_quad_t tval; struct thread *p = curthread; sf = NULL; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } sf = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); nd->nd_repstat = nfsvno_statfs(vp, sf); getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); if (nd->nd_repstat) goto out; if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, NFSX_V2STATFS); *tl++ = txdr_unsigned(NFS_V2MAXDATA); *tl++ = txdr_unsigned(sf->f_bsize); *tl++ = txdr_unsigned(sf->f_blocks); *tl++ = txdr_unsigned(sf->f_bfree); *tl = txdr_unsigned(sf->f_bavail); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_V3STATFS); tval = (u_quad_t)sf->f_blocks; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_bfree; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_bavail; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_files; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_ffree; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_ffree; txdr_hyper(tval, tl); tl += 2; *tl = 0; } out: free(sf, M_STATFS); NFSEXITCODE2(0, nd); return (0); } /* * nfs fsinfo service */ int nfsrvd_fsinfo(struct nfsrv_descript *nd, int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsfsinfo fs; int getret = 1; struct nfsvattr at; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); nfsvno_getfs(&fs, isdgram); vput(vp); nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, NFSX_V3FSINFO); *tl++ = txdr_unsigned(fs.fs_rtmax); *tl++ = txdr_unsigned(fs.fs_rtpref); *tl++ = txdr_unsigned(fs.fs_rtmult); *tl++ = txdr_unsigned(fs.fs_wtmax); *tl++ = txdr_unsigned(fs.fs_wtpref); *tl++ = txdr_unsigned(fs.fs_wtmult); *tl++ = txdr_unsigned(fs.fs_dtpref); txdr_hyper(fs.fs_maxfilesize, tl); tl += 2; txdr_nfsv3time(&fs.fs_timedelta, tl); tl += 2; *tl = txdr_unsigned(fs.fs_properties); out: NFSEXITCODE2(0, nd); return (0); } /* * nfs pathconf service */ int nfsrvd_pathconf(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { struct nfsv3_pathconf *pc; int getret = 1; long linkmax, namemax, chownres, notrunc; struct nfsvattr at; struct thread *p = curthread; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } nd->nd_repstat = nfsvno_pathconf(vp, _PC_LINK_MAX, &linkmax, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_pathconf(vp, _PC_NAME_MAX, &namemax, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat=nfsvno_pathconf(vp, _PC_CHOWN_RESTRICTED, &chownres, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_pathconf(vp, _PC_NO_TRUNC, ¬runc, nd->nd_cred, p); getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); vput(vp); nfsrv_postopattr(nd, getret, &at); if (!nd->nd_repstat) { NFSM_BUILD(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF); pc->pc_linkmax = txdr_unsigned(linkmax); pc->pc_namemax = txdr_unsigned(namemax); pc->pc_notrunc = txdr_unsigned(notrunc); pc->pc_chownrestricted = txdr_unsigned(chownres); /* * These should probably be supported by VOP_PATHCONF(), but * until msdosfs is exportable (why would you want to?), the * Unix defaults should be ok. */ pc->pc_caseinsensitive = newnfs_false; pc->pc_casepreserving = newnfs_true; } out: NFSEXITCODE2(0, nd); return (0); } /* * nfsv4 lock service */ int nfsrvd_lock(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp = NULL; struct nfslock *lop; struct nfslockconflict cf; int error = 0; u_short flags = NFSLCK_LOCK, lflags; u_int64_t offset, len; nfsv4stateid_t stateid; nfsquad_t clientid; struct thread *p = curthread; NFSM_DISSECT(tl, u_int32_t *, 7 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: lflags = NFSLCK_READ; break; case NFSV4LOCKT_WRITEW: flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: lflags = NFSLCK_WRITE; break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (*tl++ == newnfs_true) flags |= NFSLCK_RECLAIM; offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; if (*tl == newnfs_true) flags |= NFSLCK_OPENTOLOCK; if (flags & NFSLCK_OPENTOLOCK) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED + NFSX_STATEID); i = fxdr_unsigned(int, *(tl+4+(NFSX_STATEID / NFSX_UNSIGNED))); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp = malloc(sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = nd->nd_rp; stp->ls_seq = fxdr_unsigned(int, *tl++); stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stp->ls_stateid = nd->nd_curstateid; stp->ls_stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } stp->ls_opentolockseq = fxdr_unsigned(int, *tl++); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK3 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stp = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stp->ls_stateid = nd->nd_curstateid; stp->ls_stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } stp->ls_seq = fxdr_unsigned(int, *tl); clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK4 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } } lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); lop->lo_first = offset; if (len == NFS64BITSSET) { lop->lo_end = NFS64BITSSET; } else { lop->lo_end = offset + len; if (lop->lo_end <= lop->lo_first) nd->nd_repstat = NFSERR_INVAL; } lop->lo_flags = lflags; stp->ls_flags = flags; stp->ls_uid = nd->nd_cred->cr_uid; /* * Do basic access checking. */ if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } if (!nd->nd_repstat) { if (lflags & NFSLCK_WRITE) { nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } else { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } } /* * We call nfsrv_lockctrl() even if nd_repstat set, so that the * seqid# gets updated. nfsrv_lockctrl() will return the value * of nd_repstat, if it gets that far. */ nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid, &stateid, exp, nd, p); if (lop) free(lop, M_NFSDLOCK); if (stp) free(stp, M_NFSDSTATE); if (!nd->nd_repstat) { /* For NFSv4.1, set the Current StateID. */ if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_curstateid = stateid; nd->nd_flag |= ND_CURSTATEID; } NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } else if (nd->nd_repstat == NFSERR_DENIED) { NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); txdr_hyper(cf.cl_first, tl); tl += 2; if (cf.cl_end == NFS64BITSSET) len = NFS64BITSSET; else len = cf.cl_end - cf.cl_first; txdr_hyper(len, tl); tl += 2; if (cf.cl_flags == NFSLCK_WRITE) *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); else *tl++ = txdr_unsigned(NFSV4LOCKT_READ); *tl++ = stateid.other[0]; *tl = stateid.other[1]; (void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen); } vput(vp); NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 lock test service */ int nfsrvd_lockt(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp = NULL; struct nfslock lo, *lop = &lo; struct nfslockconflict cf; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; u_int64_t len; struct thread *p = curthread; NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *(tl + 7)); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp = malloc(sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = NULL; stp->ls_flags = NFSLCK_TEST; stp->ls_uid = nd->nd_cred->cr_uid; i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: lo.lo_flags = NFSLCK_READ; break; case NFSV4LOCKT_WRITEW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: lo.lo_flags = NFSLCK_WRITE; break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } lo.lo_first = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); if (len == NFS64BITSSET) { lo.lo_end = NFS64BITSSET; } else { lo.lo_end = lo.lo_first + len; if (lo.lo_end <= lo.lo_first) nd->nd_repstat = NFSERR_INVAL; } tl += 2; clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK5 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } if (!nd->nd_repstat) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { if (nd->nd_repstat == NFSERR_DENIED) { NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); txdr_hyper(cf.cl_first, tl); tl += 2; if (cf.cl_end == NFS64BITSSET) len = NFS64BITSSET; else len = cf.cl_end - cf.cl_first; txdr_hyper(len, tl); tl += 2; if (cf.cl_flags == NFSLCK_WRITE) *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); else *tl++ = txdr_unsigned(NFSV4LOCKT_READ); *tl++ = stp->ls_stateid.other[0]; *tl = stp->ls_stateid.other[1]; (void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen); } } vput(vp); if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 unlock service */ int nfsrvd_locku(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp; struct nfslock *lop; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; u_int64_t len; struct thread *p = curthread; NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED + NFSX_STATEID); stp = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); stp->ls_flags = NFSLCK_UNLOCK; lop->lo_flags = NFSLCK_UNLOCK; stp->ls_op = nd->nd_rp; i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: break; case NFSV4LOCKT_WRITEW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: break; default: nd->nd_repstat = NFSERR_BADXDR; free(stp, M_NFSDSTATE); free(lop, M_NFSDLOCK); goto nfsmout; } stp->ls_ownerlen = 0; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_seq = fxdr_unsigned(int, *tl++); stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stp->ls_stateid = nd->nd_curstateid; stp->ls_stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; free(stp, M_NFSDSTATE); free(lop, M_NFSDLOCK); goto nfsmout; } } lop->lo_first = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); if (len == NFS64BITSSET) { lop->lo_end = NFS64BITSSET; } else { lop->lo_end = lop->lo_first + len; if (lop->lo_end <= lop->lo_first) nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK6 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } /* * Call nfsrv_lockctrl() even if nd_repstat is set, so that the * seqid# gets incremented. nfsrv_lockctrl() will return the * value of nd_repstat, if it gets that far. */ nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (stp) free(stp, M_NFSDSTATE); if (lop) free(lop, M_NFSDLOCK); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 open service */ int nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, __unused fhandle_t *fhp, struct nfsexstuff *exp) { u_int32_t *tl; int i, retext; struct nfsstate *stp = NULL; int error = 0, create, claim, exclusive_flag = 0, override; u_int32_t rflags = NFSV4OPEN_LOCKTYPEPOSIX, acemask; int how = NFSCREATE_UNCHECKED; int32_t cverf[2], tverf[2] = { 0, 0 }; vnode_t vp = NULL, dirp = NULL; struct nfsvattr nva, dirfor, diraft; struct nameidata named; nfsv4stateid_t stateid, delegstateid; nfsattrbit_t attrbits; nfsquad_t clientid; char *bufp = NULL; u_long *hashp; NFSACL_T *aclp = NULL; struct thread *p = curthread; #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif NFSZERO_ATTRBIT(&attrbits); named.ni_startdir = NULL; named.ni_cnd.cn_nameiop = 0; NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *(tl + 5)); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp = malloc(sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = nd->nd_rp; stp->ls_flags = NFSLCK_OPEN; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); i = fxdr_unsigned(int, *tl++); retext = 0; if ((i & (NFSV4OPEN_WANTDELEGMASK | NFSV4OPEN_WANTSIGNALDELEG | NFSV4OPEN_WANTPUSHDELEG)) != 0 && (nd->nd_flag & ND_NFSV41) != 0) { retext = 1; /* For now, ignore these. */ i &= ~(NFSV4OPEN_WANTPUSHDELEG | NFSV4OPEN_WANTSIGNALDELEG); switch (i & NFSV4OPEN_WANTDELEGMASK) { case NFSV4OPEN_WANTANYDELEG: stp->ls_flags |= (NFSLCK_WANTRDELEG | NFSLCK_WANTWDELEG); i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTREADDELEG: stp->ls_flags |= NFSLCK_WANTRDELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTWRITEDELEG: stp->ls_flags |= NFSLCK_WANTWDELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTNODELEG: stp->ls_flags |= NFSLCK_WANTNODELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTCANCEL: printf("NFSv4: ignore Open WantCancel\n"); i &= ~NFSV4OPEN_WANTDELEGMASK; break; default: /* nd_repstat will be set to NFSERR_INVAL below. */ break; } } switch (i) { case NFSV4OPEN_ACCESSREAD: stp->ls_flags |= NFSLCK_READACCESS; break; case NFSV4OPEN_ACCESSWRITE: stp->ls_flags |= NFSLCK_WRITEACCESS; break; case NFSV4OPEN_ACCESSBOTH: stp->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); break; default: nd->nd_repstat = NFSERR_INVAL; } i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4OPEN_DENYNONE: break; case NFSV4OPEN_DENYREAD: stp->ls_flags |= NFSLCK_READDENY; break; case NFSV4OPEN_DENYWRITE: stp->ls_flags |= NFSLCK_WRITEDENY; break; case NFSV4OPEN_DENYBOTH: stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY); break; default: nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK7 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; NFSVNO_ATTRINIT(&nva); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); create = fxdr_unsigned(int, *tl); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(dp, &dirfor, nd, p, 0, NULL); if (create == NFSV4OPEN_CREATE) { nva.na_type = VREG; nva.na_mode = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSCREATE_UNCHECKED: case NFSCREATE_GUARDED: error = nfsv4_sattr(nd, NULL, &nva, &attrbits, aclp, p); if (error) goto nfsmout; /* * If the na_gid being set is the same as that of * the directory it is going in, clear it, since * that is what will be set by default. This allows * a user that isn't in that group to do the create. */ if (!nd->nd_repstat && NFSVNO_ISSETGID(&nva) && nva.na_gid == dirfor.na_gid) NFSVNO_UNSET(&nva, gid); if (!nd->nd_repstat) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); break; case NFSCREATE_EXCLUSIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; break; case NFSCREATE_EXCLUSIVE41: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; error = nfsv4_sattr(nd, NULL, &nva, &attrbits, aclp, p); if (error != 0) goto nfsmout; if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET)) nd->nd_repstat = NFSERR_INVAL; /* * If the na_gid being set is the same as that of * the directory it is going in, clear it, since * that is what will be set by default. This allows * a user that isn't in that group to do the create. */ if (nd->nd_repstat == 0 && NFSVNO_ISSETGID(&nva) && nva.na_gid == dirfor.na_gid) NFSVNO_UNSET(&nva, gid); if (nd->nd_repstat == 0) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } } else if (create != NFSV4OPEN_NOCREATE) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } /* * Now, handle the claim, which usually includes looking up a * name in the directory referenced by dp. The exception is * NFSV4OPEN_CLAIMPREVIOUS. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); claim = fxdr_unsigned(int, *tl); if (claim == NFSV4OPEN_CLAIMDELEGATECUR || claim == NFSV4OPEN_CLAIMDELEGATECURFH) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl,(caddr_t)stateid.other,NFSX_STATEIDOTHER); stp->ls_flags |= NFSLCK_DELEGCUR; } else if (claim == NFSV4OPEN_CLAIMDELEGATEPREV || claim == NFSV4OPEN_CLAIMDELEGATEPREVFH) { stp->ls_flags |= NFSLCK_DELEGPREV; } if (claim == NFSV4OPEN_CLAIMNULL || claim == NFSV4OPEN_CLAIMDELEGATECUR || claim == NFSV4OPEN_CLAIMDELEGATEPREV) { if (!nd->nd_repstat && create == NFSV4OPEN_CREATE && claim != NFSV4OPEN_CLAIMNULL) nd->nd_repstat = NFSERR_INVAL; if (nd->nd_repstat) { nd->nd_repstat = nfsrv_opencheck(clientid, &stateid, stp, NULL, nd, p, nd->nd_repstat); goto nfsmout; } if (create == NFSV4OPEN_CREATE) NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); else NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif free(stp, M_NFSDSTATE); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (create == NFSV4OPEN_CREATE) { switch (how) { case NFSCREATE_UNCHECKED: if (named.ni_vp) { /* * Clear the setable attribute bits, except * for Size, if it is being truncated. */ NFSZERO_ATTRBIT(&attrbits); if (NFSVNO_ISSETSIZE(&nva)) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); } break; case NFSCREATE_GUARDED: if (named.ni_vp && !nd->nd_repstat) nd->nd_repstat = EEXIST; break; case NFSCREATE_EXCLUSIVE: exclusive_flag = 1; if (!named.ni_vp) nva.na_mode = 0; break; case NFSCREATE_EXCLUSIVE41: exclusive_flag = 1; break; } } nfsvno_open(nd, &named, clientid, &stateid, stp, &exclusive_flag, &nva, cverf, create, aclp, &attrbits, nd->nd_cred, exp, &vp); } else if (claim == NFSV4OPEN_CLAIMPREVIOUS || claim == NFSV4OPEN_CLAIMFH || claim == NFSV4OPEN_CLAIMDELEGATECURFH || claim == NFSV4OPEN_CLAIMDELEGATEPREVFH) { if (claim == NFSV4OPEN_CLAIMPREVIOUS) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); switch (i) { case NFSV4OPEN_DELEGATEREAD: stp->ls_flags |= NFSLCK_DELEGREAD; break; case NFSV4OPEN_DELEGATEWRITE: stp->ls_flags |= NFSLCK_DELEGWRITE; case NFSV4OPEN_DELEGATENONE: break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp->ls_flags |= NFSLCK_RECLAIM; } else { if (nd->nd_repstat == 0 && create == NFSV4OPEN_CREATE) nd->nd_repstat = NFSERR_INVAL; } vp = dp; NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (!VN_IS_DOOMED(vp)) nd->nd_repstat = nfsrv_opencheck(clientid, &stateid, stp, vp, nd, p, nd->nd_repstat); else nd->nd_repstat = NFSERR_PERM; } else { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } /* * Do basic access checking. */ if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { /* * The IETF working group decided that this is the correct * error return for all non-regular files. */ nd->nd_repstat = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_SYMLINK; } /* * If the Open is being done for a file that already exists, apply * normal permission checking including for the file owner, if * vfs.nfsd.v4openaccess is set. * Previously, the owner was always allowed to open the file to * be consistent with the NFS tradition of always allowing the * owner of the file to write to the file regardless of permissions. * It now appears that the Linux client expects the owner * permissions to be checked for opens that are not creating the * file. I believe the correct approach is to use the Access * operation's results to be consistent with NFSv3, but that is * not what the current Linux client appears to be doing. * Since both the Linux and OpenSolaris NFSv4 servers do this check, * I have enabled it by default. * If this semantic change causes a problem, it can be disabled by * setting the sysctl vfs.nfsd.v4openaccess to 0 to re-enable the * previous semantics. */ if (nfsrv_openaccess && create == NFSV4OPEN_NOCREATE) override = NFSACCCHK_NOOVERRIDE; else override = NFSACCCHK_ALLOWOWNER; if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_WRITEACCESS)) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, override, NFSACCCHK_VPISLOCKED, NULL); if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_READACCESS)) { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, override, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, override, NFSACCCHK_VPISLOCKED, NULL); } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) { tverf[0] = nva.na_atime.tv_sec; tverf[1] = nva.na_atime.tv_nsec; } } if (!nd->nd_repstat && exclusive_flag && (cverf[0] != tverf[0] || cverf[1] != tverf[1])) nd->nd_repstat = EEXIST; /* * Do the open locking/delegation stuff. */ if (!nd->nd_repstat) nd->nd_repstat = nfsrv_openctrl(nd, vp, &stp, clientid, &stateid, &delegstateid, &rflags, exp, p, nva.na_filerev); /* * vp must be unlocked before the call to nfsvno_getattr(dirp,...) * below, to avoid a deadlock with the lookup in nfsvno_namei() above. * (ie: Leave the NFSVOPUNLOCK() about here.) */ if (vp) NFSVOPUNLOCK(vp); if (stp) free(stp, M_NFSDSTATE); if (!nd->nd_repstat && dirp) nd->nd_repstat = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); if (!nd->nd_repstat) { /* For NFSv4.1, set the Current StateID. */ if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_curstateid = stateid; nd->nd_flag |= ND_CURSTATEID; } NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (claim == NFSV4OPEN_CLAIMPREVIOUS) { *tl++ = newnfs_true; *tl++ = 0; *tl++ = 0; *tl++ = 0; *tl++ = 0; } else { *tl++ = newnfs_false; /* Since dirp is not locked */ txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); tl += 2; } *tl = txdr_unsigned(rflags & NFSV4OPEN_RFLAGS); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (rflags & NFSV4OPEN_READDELEGATE) *tl = txdr_unsigned(NFSV4OPEN_DELEGATEREAD); else if (rflags & NFSV4OPEN_WRITEDELEGATE) *tl = txdr_unsigned(NFSV4OPEN_DELEGATEWRITE); else if (retext != 0) { *tl = txdr_unsigned(NFSV4OPEN_DELEGATENONEEXT); if ((rflags & NFSV4OPEN_WDNOTWANTED) != 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_NOTWANTED); } else if ((rflags & NFSV4OPEN_WDSUPPFTYPE) != 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_NOTSUPPFTYPE); } else if ((rflags & NFSV4OPEN_WDCONTENTION) != 0) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CONTENTION); *tl = newnfs_false; } else if ((rflags & NFSV4OPEN_WDRESOURCE) != 0) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_RESOURCE); *tl = newnfs_false; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_NOTWANTED); } } else *tl = txdr_unsigned(NFSV4OPEN_DELEGATENONE); if (rflags & (NFSV4OPEN_READDELEGATE|NFSV4OPEN_WRITEDELEGATE)) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID+NFSX_UNSIGNED); *tl++ = txdr_unsigned(delegstateid.seqid); NFSBCOPY((caddr_t)delegstateid.other, (caddr_t)tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (rflags & NFSV4OPEN_RECALL) *tl = newnfs_true; else *tl = newnfs_false; if (rflags & NFSV4OPEN_WRITEDELEGATE) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_LIMITSIZE); txdr_hyper(nva.na_size, tl); } NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4ACE_ALLOWEDTYPE); *tl++ = txdr_unsigned(0x0); acemask = NFSV4ACE_ALLFILESMASK; if (nva.na_mode & S_IRUSR) acemask |= NFSV4ACE_READMASK; if (nva.na_mode & S_IWUSR) acemask |= NFSV4ACE_WRITEMASK; if (nva.na_mode & S_IXUSR) acemask |= NFSV4ACE_EXECUTEMASK; *tl = txdr_unsigned(acemask); (void) nfsm_strtom(nd, "OWNER@", 6); } *vpp = vp; } else if (vp) { vrele(vp); } if (dirp) vrele(dirp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 close service */ int nfsrvd_close(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate st, *stp = &st; int error = 0, writeacc; nfsv4stateid_t stateid; nfsquad_t clientid; struct nfsvattr na; struct thread *p = curthread; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) stp->ls_stateid = nd->nd_curstateid; else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } stp->ls_flags = NFSLCK_CLOSE; clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK8 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p, &writeacc); /* For pNFS, update the attributes. */ if (writeacc != 0 || nfsrv_pnfsatime != 0) nfsrv_updatemdsattr(vp, &na, p); vput(vp); if (!nd->nd_repstat) { /* * If the stateid that has been closed is the current stateid, * unset it. */ if ((nd->nd_flag & ND_CURSTATEID) != 0 && stateid.other[0] == nd->nd_curstateid.other[0] && stateid.other[1] == nd->nd_curstateid.other[1] && stateid.other[2] == nd->nd_curstateid.other[2]) nd->nd_flag &= ~ND_CURSTATEID; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 delegpurge service */ int nfsrvd_delegpurge(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid; struct thread *p = curthread; if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK9 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_delegupdate(nd, clientid, NULL, NULL, NFSV4OP_DELEGPURGE, nd->nd_cred, p, NULL); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 delegreturn service */ int nfsrvd_delegreturn(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0, writeacc; nfsv4stateid_t stateid; nfsquad_t clientid; struct nfsvattr na; struct thread *p = curthread; NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other, NFSX_STATEIDOTHER); clientid.lval[0] = stateid.other[0]; clientid.lval[1] = stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK10 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_delegupdate(nd, clientid, &stateid, vp, NFSV4OP_DELEGRETURN, nd->nd_cred, p, &writeacc); /* For pNFS, update the attributes. */ if (writeacc != 0 || nfsrv_pnfsatime != 0) nfsrv_updatemdsattr(vp, &na, p); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 get file handle service */ int nfsrvd_getfh(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { fhandle_t fh; struct thread *p = curthread; nd->nd_repstat = nfsvno_getfh(vp, &fh, p); vput(vp); if (!nd->nd_repstat) (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0); NFSEXITCODE2(0, nd); return (0); } /* * nfsv4 open confirm service */ int nfsrvd_openconfirm(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate st, *stp = &st; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; struct thread *p = curthread; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); stp->ls_seq = fxdr_unsigned(u_int32_t, *tl); stp->ls_flags = NFSLCK_CONFIRM; clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK11 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p, NULL); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 open downgrade service */ int nfsrvd_opendowngrade(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate st, *stp = &st; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; struct thread *p = curthread; /* opendowngrade can only work on a file object.*/ if (vp->v_type != VREG) { error = NFSERR_INVAL; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && stp->ls_stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) stp->ls_stateid = nd->nd_curstateid; else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); i = fxdr_unsigned(int, *tl++); if ((nd->nd_flag & ND_NFSV41) != 0) i &= ~NFSV4OPEN_WANTDELEGMASK; switch (i) { case NFSV4OPEN_ACCESSREAD: stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_DOWNGRADE); break; case NFSV4OPEN_ACCESSWRITE: stp->ls_flags = (NFSLCK_WRITEACCESS | NFSLCK_DOWNGRADE); break; case NFSV4OPEN_ACCESSBOTH: stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_WRITEACCESS | NFSLCK_DOWNGRADE); break; default: nd->nd_repstat = NFSERR_INVAL; } i = fxdr_unsigned(int, *tl); switch (i) { case NFSV4OPEN_DENYNONE: break; case NFSV4OPEN_DENYREAD: stp->ls_flags |= NFSLCK_READDENY; break; case NFSV4OPEN_DENYWRITE: stp->ls_flags |= NFSLCK_WRITEDENY; break; case NFSV4OPEN_DENYBOTH: stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY); break; default: nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK12 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } if (!nd->nd_repstat) nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p, NULL); if (!nd->nd_repstat) { /* For NFSv4.1, set the Current StateID. */ if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_curstateid = stateid; nd->nd_flag |= ND_CURSTATEID; } NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 renew lease service */ int nfsrvd_renew(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid; struct thread *p = curthread; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK13 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_RENEWOP|CLOPS_RENEW), NULL, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 security info service */ int nfsrvd_secinfo(struct nfsrv_descript *nd, int isdgram, vnode_t dp, struct nfsexstuff *exp) { u_int32_t *tl; int len; struct nameidata named; vnode_t dirp = NULL, vp; struct nfsrvfh fh; struct nfsexstuff retnes; u_int32_t *sizp; int error = 0, i; uint64_t savflag; char *bufp; u_long *hashp; struct thread *p = curthread; /* * All this just to get the export flags for the name. */ NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vput(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); } else { vput(dp); nfsvno_relpathbuf(&named); } if (dirp) vrele(dirp); if (nd->nd_repstat) goto out; vrele(named.ni_startdir); nfsvno_relpathbuf(&named); fh.nfsrvfh_len = NFSX_MYFH; vp = named.ni_vp; nd->nd_repstat = nfsvno_getfh(vp, (fhandle_t *)fh.nfsrvfh_data, p); vput(vp); savflag = nd->nd_flag; if (!nd->nd_repstat) { /* * Pretend the next op is Secinfo, so that no wrongsec * test will be done. */ nfsd_fhtovp(nd, &fh, LK_SHARED, &vp, &retnes, NULL, 0, NFSV4OP_SECINFO); if (vp) vput(vp); } nd->nd_flag = savflag; if (nd->nd_repstat) goto out; /* * Finally have the export flags for name, so we can create * the security info. */ len = 0; NFSM_BUILD(sizp, u_int32_t *, NFSX_UNSIGNED); /* If nes_numsecflavor == 0, all are allowed. */ if (retnes.nes_numsecflavor == 0) { NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_UNIX); *tl = txdr_unsigned(RPCAUTH_GSS); nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl++ = txdr_unsigned(RPCAUTHGSS_SVCNONE); *tl = txdr_unsigned(RPCAUTH_GSS); nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl++ = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY); *tl = txdr_unsigned(RPCAUTH_GSS); nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY); len = 4; } for (i = 0; i < retnes.nes_numsecflavor; i++) { if (retnes.nes_secflavors[i] == AUTH_SYS) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(RPCAUTH_UNIX); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCNONE); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5I) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5P) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY); len++; } } *sizp = txdr_unsigned(len); out: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 security info no name service */ int nfsrvd_secinfononame(struct nfsrv_descript *nd, int isdgram, vnode_t dp, struct nfsexstuff *exp) { uint32_t *tl, *sizp; struct nameidata named; vnode_t dirp = NULL, vp; struct nfsrvfh fh; struct nfsexstuff retnes; int error = 0, fhstyle, i, len; uint64_t savflag; char *bufp; u_long *hashp; struct thread *p = curthread; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); fhstyle = fxdr_unsigned(int, *tl); switch (fhstyle) { case NFSSECINFONONAME_PARENT: NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error != 0) { vput(dp); nfsvno_relpathbuf(&named); goto nfsmout; } if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); else vput(dp); if (dirp != NULL) vrele(dirp); vrele(named.ni_startdir); nfsvno_relpathbuf(&named); vp = named.ni_vp; break; case NFSSECINFONONAME_CURFH: vp = dp; break; default: nd->nd_repstat = NFSERR_INVAL; vput(dp); } if (nd->nd_repstat != 0) goto nfsmout; fh.nfsrvfh_len = NFSX_MYFH; nd->nd_repstat = nfsvno_getfh(vp, (fhandle_t *)fh.nfsrvfh_data, p); vput(vp); savflag = nd->nd_flag; if (nd->nd_repstat == 0) { /* * Pretend the next op is Secinfo, so that no wrongsec * test will be done. */ nfsd_fhtovp(nd, &fh, LK_SHARED, &vp, &retnes, NULL, 0, NFSV4OP_SECINFO); if (vp != NULL) vput(vp); } nd->nd_flag = savflag; if (nd->nd_repstat != 0) goto nfsmout; /* * Finally have the export flags for fh/parent, so we can create * the security info. */ len = 0; NFSM_BUILD(sizp, uint32_t *, NFSX_UNSIGNED); /* If nes_numsecflavor == 0, all are allowed. */ if (retnes.nes_numsecflavor == 0) { NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_UNIX); *tl = txdr_unsigned(RPCAUTH_GSS); nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl++ = txdr_unsigned(RPCAUTHGSS_SVCNONE); *tl = txdr_unsigned(RPCAUTH_GSS); nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl++ = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY); *tl = txdr_unsigned(RPCAUTH_GSS); nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY); len = 4; } for (i = 0; i < retnes.nes_numsecflavor; i++) { if (retnes.nes_secflavors[i] == AUTH_SYS) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(RPCAUTH_UNIX); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(RPCAUTH_GSS); nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCNONE); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5I) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(RPCAUTH_GSS); nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5P) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(RPCAUTH_GSS); nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY); len++; } } *sizp = txdr_unsigned(len); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 set client id service */ int nfsrvd_setclientid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int i; int error = 0, idlen; struct nfsclient *clp = NULL; #ifdef INET struct sockaddr_in *rin; #endif #ifdef INET6 struct sockaddr_in6 *rin6; #endif #if defined(INET) || defined(INET6) u_char *ucp, *ucp2; #endif u_char *verf, *addrbuf; nfsquad_t clientid, confirm; struct thread *p = curthread; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0) goto out; NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED); verf = (u_char *)tl; tl += (NFSX_VERF / NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i > NFSV4_OPAQUELIMIT || i <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } idlen = i; if (nd->nd_flag & ND_GSS) i += nd->nd_princlen; clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK | M_ZERO); clp->lc_stateid = malloc(sizeof(struct nfsstatehead) * nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK); NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx); /* Allocated large enough for an AF_INET or AF_INET6 socket. */ clp->lc_req.nr_nam = malloc(sizeof(struct sockaddr_in6), M_SONAME, M_WAITOK | M_ZERO); clp->lc_req.nr_cred = NULL; NFSBCOPY(verf, clp->lc_verf, NFSX_VERF); clp->lc_idlen = idlen; error = nfsrv_mtostr(nd, clp->lc_id, idlen); if (error) goto nfsmout; if (nd->nd_flag & ND_GSS) { clp->lc_flags = LCL_GSS; if (nd->nd_flag & ND_GSSINTEGRITY) clp->lc_flags |= LCL_GSSINTEGRITY; else if (nd->nd_flag & ND_GSSPRIVACY) clp->lc_flags |= LCL_GSSPRIVACY; } else { clp->lc_flags = 0; } if ((nd->nd_flag & ND_GSS) && nd->nd_princlen > 0) { clp->lc_flags |= LCL_NAME; clp->lc_namelen = nd->nd_princlen; clp->lc_name = &clp->lc_id[idlen]; NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen); } else { clp->lc_uid = nd->nd_cred->cr_uid; clp->lc_gid = nd->nd_cred->cr_gid; } /* If the client is using TLS, do so for the callback connection. */ if (nd->nd_flag & ND_TLS) clp->lc_flags |= LCL_TLSCB; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); clp->lc_program = fxdr_unsigned(u_int32_t, *tl); error = nfsrv_getclientipaddr(nd, clp); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); clp->lc_callback = fxdr_unsigned(u_int32_t, *tl); /* * nfsrv_setclient() does the actual work of adding it to the * client list. If there is no error, the structure has been * linked into the client list and clp should no longer be used * here. When an error is returned, it has not been linked in, * so it should be free'd. */ nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p); if (nd->nd_repstat == NFSERR_CLIDINUSE) { /* * 8 is the maximum length of the port# string. */ addrbuf = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK); switch (clp->lc_req.nr_nam->sa_family) { #ifdef INET case AF_INET: if (clp->lc_flags & LCL_TCPCALLBACK) (void) nfsm_strtom(nd, "tcp", 3); else (void) nfsm_strtom(nd, "udp", 3); rin = (struct sockaddr_in *)clp->lc_req.nr_nam; ucp = (u_char *)&rin->sin_addr.s_addr; ucp2 = (u_char *)&rin->sin_port; sprintf(addrbuf, "%d.%d.%d.%d.%d.%d", ucp[0] & 0xff, ucp[1] & 0xff, ucp[2] & 0xff, ucp[3] & 0xff, ucp2[0] & 0xff, ucp2[1] & 0xff); break; #endif #ifdef INET6 case AF_INET6: if (clp->lc_flags & LCL_TCPCALLBACK) (void) nfsm_strtom(nd, "tcp6", 4); else (void) nfsm_strtom(nd, "udp6", 4); rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; ucp = inet_ntop(AF_INET6, &rin6->sin6_addr, addrbuf, INET6_ADDRSTRLEN); if (ucp != NULL) i = strlen(ucp); else i = 0; ucp2 = (u_char *)&rin6->sin6_port; sprintf(&addrbuf[i], ".%d.%d", ucp2[0] & 0xff, ucp2[1] & 0xff); break; #endif } (void) nfsm_strtom(nd, addrbuf, strlen(addrbuf)); free(addrbuf, M_TEMP); } if (clp) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER); *tl++ = clientid.lval[0]; *tl++ = clientid.lval[1]; *tl++ = confirm.lval[0]; *tl = confirm.lval[1]; } out: NFSEXITCODE2(0, nd); return (0); nfsmout: if (clp) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 set client id confirm service */ int nfsrvd_setclientidcfrm(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid, confirm; struct thread *p = curthread; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; confirm.lval[0] = *tl++; confirm.lval[1] = *tl; /* * nfsrv_getclient() searches the client list for a match and * returns the appropriate NFSERR status. */ nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_CONFIRM|CLOPS_RENEW), NULL, NULL, confirm, 0, nd, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 verify service */ int nfsrvd_verify(struct nfsrv_descript *nd, int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { int error = 0, ret, fhsize = NFSX_MYFH; struct nfsvattr nva; struct statfs *sf; struct nfsfsinfo fs; fhandle_t fh; struct thread *p = curthread; sf = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_statfs(vp, sf); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) { nfsvno_getfs(&fs, isdgram); error = nfsv4_loadattr(nd, vp, &nva, NULL, &fh, fhsize, NULL, sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, p, nd->nd_cred); if (!error) { if (nd->nd_procnum == NFSV4OP_NVERIFY) { if (ret == 0) nd->nd_repstat = NFSERR_SAME; else if (ret != NFSERR_NOTSAME) nd->nd_repstat = ret; } else if (ret) nd->nd_repstat = ret; } } vput(vp); free(sf, M_STATFS); NFSEXITCODE2(error, nd); return (error); } /* * nfs openattr rpc */ int nfsrvd_openattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, __unused vnode_t *vpp, __unused fhandle_t *fhp, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0, createdir __unused; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); createdir = fxdr_unsigned(int, *tl); nd->nd_repstat = NFSERR_NOTSUPP; nfsmout: vrele(dp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 release lock owner service */ int nfsrvd_releaselckown(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate *stp = NULL; int error = 0, len; nfsquad_t clientid; struct thread *p = curthread; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 2)); if (len <= 0 || len > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp = malloc(sizeof (struct nfsstate) + len, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = len; stp->ls_op = NULL; stp->ls_flags = NFSLCK_RELEASE; stp->ls_uid = nd->nd_cred->cr_uid; clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK14 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, len); if (error) goto nfsmout; nd->nd_repstat = nfsrv_releaselckown(stp, clientid, p); free(stp, M_NFSDSTATE); NFSEXITCODE2(0, nd); return (0); nfsmout: if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 exchange_id service */ int nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0, i, idlen; struct nfsclient *clp = NULL; nfsquad_t clientid, confirm; uint8_t *verf; uint32_t sp4type, v41flags; uint64_t owner_minor; struct timespec verstime; #ifdef INET struct sockaddr_in *sin, *rin; #endif #ifdef INET6 struct sockaddr_in6 *sin6, *rin6; #endif struct thread *p = curthread; if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED); verf = (uint8_t *)tl; tl += (NFSX_VERF / NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i > NFSV4_OPAQUELIMIT || i <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } idlen = i; if (nd->nd_flag & ND_GSS) i += nd->nd_princlen; clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK | M_ZERO); clp->lc_stateid = malloc(sizeof(struct nfsstatehead) * nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK); NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx); /* Allocated large enough for an AF_INET or AF_INET6 socket. */ clp->lc_req.nr_nam = malloc(sizeof(struct sockaddr_in6), M_SONAME, M_WAITOK | M_ZERO); switch (nd->nd_nam->sa_family) { #ifdef INET case AF_INET: rin = (struct sockaddr_in *)clp->lc_req.nr_nam; sin = (struct sockaddr_in *)nd->nd_nam; rin->sin_family = AF_INET; rin->sin_len = sizeof(struct sockaddr_in); rin->sin_port = 0; rin->sin_addr.s_addr = sin->sin_addr.s_addr; break; #endif #ifdef INET6 case AF_INET6: rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; sin6 = (struct sockaddr_in6 *)nd->nd_nam; rin6->sin6_family = AF_INET6; rin6->sin6_len = sizeof(struct sockaddr_in6); rin6->sin6_port = 0; rin6->sin6_addr = sin6->sin6_addr; break; #endif } clp->lc_req.nr_cred = NULL; NFSBCOPY(verf, clp->lc_verf, NFSX_VERF); clp->lc_idlen = idlen; error = nfsrv_mtostr(nd, clp->lc_id, idlen); if (error != 0) goto nfsmout; if ((nd->nd_flag & ND_GSS) != 0) { clp->lc_flags = LCL_GSS | LCL_NFSV41; if ((nd->nd_flag & ND_GSSINTEGRITY) != 0) clp->lc_flags |= LCL_GSSINTEGRITY; else if ((nd->nd_flag & ND_GSSPRIVACY) != 0) clp->lc_flags |= LCL_GSSPRIVACY; } else clp->lc_flags = LCL_NFSV41; if ((nd->nd_flag & ND_NFSV42) != 0) clp->lc_flags |= LCL_NFSV42; if ((nd->nd_flag & ND_GSS) != 0 && nd->nd_princlen > 0) { clp->lc_flags |= LCL_NAME; clp->lc_namelen = nd->nd_princlen; clp->lc_name = &clp->lc_id[idlen]; NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen); } else { clp->lc_uid = nd->nd_cred->cr_uid; clp->lc_gid = nd->nd_cred->cr_gid; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); v41flags = fxdr_unsigned(uint32_t, *tl++); if ((v41flags & ~(NFSV4EXCH_SUPPMOVEDREFER | NFSV4EXCH_SUPPMOVEDMIGR | NFSV4EXCH_BINDPRINCSTATEID | NFSV4EXCH_MASKPNFS | NFSV4EXCH_UPDCONFIRMEDRECA)) != 0) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } if ((v41flags & NFSV4EXCH_UPDCONFIRMEDRECA) != 0) confirm.lval[1] = 1; else confirm.lval[1] = 0; if (nfsrv_devidcnt == 0) v41flags = NFSV4EXCH_USENONPNFS | NFSV4EXCH_USEPNFSDS; else v41flags = NFSV4EXCH_USEPNFSMDS; sp4type = fxdr_unsigned(uint32_t, *tl); if (sp4type != NFSV4EXCH_SP4NONE) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } /* * nfsrv_setclient() does the actual work of adding it to the * client list. If there is no error, the structure has been * linked into the client list and clp should no longer be used * here. When an error is returned, it has not been linked in, * so it should be free'd. */ nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p); if (clp != NULL) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } if (nd->nd_repstat == 0) { if (confirm.lval[1] != 0) v41flags |= NFSV4EXCH_CONFIRMEDR; NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + 3 * NFSX_UNSIGNED); *tl++ = clientid.lval[0]; /* ClientID */ *tl++ = clientid.lval[1]; *tl++ = txdr_unsigned(confirm.lval[0]); /* SequenceID */ *tl++ = txdr_unsigned(v41flags); /* Exch flags */ *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE); /* No SSV */ owner_minor = 0; /* Owner */ txdr_hyper(owner_minor, tl); /* Minor */ (void)nfsm_strtom(nd, nd->nd_cred->cr_prison->pr_hostuuid, strlen(nd->nd_cred->cr_prison->pr_hostuuid)); /* Major */ (void)nfsm_strtom(nd, nd->nd_cred->cr_prison->pr_hostuuid, strlen(nd->nd_cred->cr_prison->pr_hostuuid)); /* Scope */ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(1); (void)nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org")); (void)nfsm_strtom(nd, version, strlen(version)); NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME); verstime.tv_sec = 1293840000; /* Jan 1, 2011 */ verstime.tv_nsec = 0; txdr_nfsv4time(&verstime, tl); } NFSEXITCODE2(0, nd); return (0); nfsmout: if (clp != NULL) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 create session service */ int nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0; nfsquad_t clientid, confirm; struct nfsdsession *sep = NULL; uint32_t rdmacnt; struct thread *p = curthread; + static bool do_printf = true; if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0) goto nfsmout; sep = (struct nfsdsession *)malloc(sizeof(struct nfsdsession), M_NFSDSESSION, M_WAITOK | M_ZERO); sep->sess_refcnt = 1; mtx_init(&sep->sess_cbsess.nfsess_mtx, "nfscbsession", NULL, MTX_DEF); NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; confirm.lval[0] = fxdr_unsigned(uint32_t, *tl++); sep->sess_crflags = fxdr_unsigned(uint32_t, *tl); /* Persistent sessions and RDMA are not supported. */ sep->sess_crflags &= NFSV4CRSESS_CONNBACKCHAN; /* Fore channel attributes. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Header pad always 0. */ sep->sess_maxreq = fxdr_unsigned(uint32_t, *tl++); if (sep->sess_maxreq > sb_max_adj - NFS_MAXXDR) { sep->sess_maxreq = sb_max_adj - NFS_MAXXDR; - printf("Consider increasing kern.ipc.maxsockbuf\n"); + if (do_printf) + printf("Consider increasing kern.ipc.maxsockbuf\n"); + do_printf = false; } sep->sess_maxresp = fxdr_unsigned(uint32_t, *tl++); if (sep->sess_maxresp > sb_max_adj - NFS_MAXXDR) { sep->sess_maxresp = sb_max_adj - NFS_MAXXDR; - printf("Consider increasing kern.ipc.maxsockbuf\n"); + if (do_printf) + printf("Consider increasing kern.ipc.maxsockbuf\n"); + do_printf = false; } sep->sess_maxrespcached = fxdr_unsigned(uint32_t, *tl++); sep->sess_maxops = fxdr_unsigned(uint32_t, *tl++); sep->sess_maxslots = fxdr_unsigned(uint32_t, *tl++); if (sep->sess_maxslots > NFSV4_SLOTS) sep->sess_maxslots = NFSV4_SLOTS; rdmacnt = fxdr_unsigned(uint32_t, *tl); if (rdmacnt > 1) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } else if (rdmacnt == 1) NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); /* Back channel attributes. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Header pad always 0. */ sep->sess_cbmaxreq = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxresp = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxrespcached = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxops = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbsess.nfsess_foreslots = fxdr_unsigned(uint32_t, *tl++); rdmacnt = fxdr_unsigned(uint32_t, *tl); if (rdmacnt > 1) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } else if (rdmacnt == 1) NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); sep->sess_cbprogram = fxdr_unsigned(uint32_t, *tl); /* * nfsrv_getclient() searches the client list for a match and * returns the appropriate NFSERR status. */ nd->nd_repstat = nfsrv_getclient(clientid, CLOPS_CONFIRM | CLOPS_RENEW, NULL, sep, confirm, sep->sess_cbprogram, nd, p); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(sep->sess_sessionid, tl, NFSX_V4SESSIONID); NFSM_BUILD(tl, uint32_t *, 18 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(confirm.lval[0]); /* sequenceid */ *tl++ = txdr_unsigned(sep->sess_crflags); /* Fore channel attributes. */ *tl++ = 0; *tl++ = txdr_unsigned(sep->sess_maxreq); *tl++ = txdr_unsigned(sep->sess_maxresp); *tl++ = txdr_unsigned(sep->sess_maxrespcached); *tl++ = txdr_unsigned(sep->sess_maxops); *tl++ = txdr_unsigned(sep->sess_maxslots); *tl++ = txdr_unsigned(1); *tl++ = txdr_unsigned(0); /* No RDMA. */ /* Back channel attributes. */ *tl++ = 0; *tl++ = txdr_unsigned(sep->sess_cbmaxreq); *tl++ = txdr_unsigned(sep->sess_cbmaxresp); *tl++ = txdr_unsigned(sep->sess_cbmaxrespcached); *tl++ = txdr_unsigned(sep->sess_cbmaxops); *tl++ = txdr_unsigned(sep->sess_cbsess.nfsess_foreslots); *tl++ = txdr_unsigned(1); *tl = txdr_unsigned(0); /* No RDMA. */ } nfsmout: if (nd->nd_repstat != 0 && sep != NULL) free(sep, M_NFSDSESSION); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 sequence service */ int nfsrvd_sequence(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; uint32_t highest_slotid, sequenceid, sflags, target_highest_slotid; int cache_this, error = 0; struct thread *p = curthread; if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0) goto nfsmout; NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(tl, nd->nd_sessionid, NFSX_V4SESSIONID); NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); sequenceid = fxdr_unsigned(uint32_t, *tl++); nd->nd_slotid = fxdr_unsigned(uint32_t, *tl++); highest_slotid = fxdr_unsigned(uint32_t, *tl++); if (*tl == newnfs_true) cache_this = 1; else cache_this = 0; nd->nd_flag |= ND_HASSEQUENCE; nd->nd_repstat = nfsrv_checksequence(nd, sequenceid, &highest_slotid, &target_highest_slotid, cache_this, &sflags, p); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(nd->nd_sessionid, tl, NFSX_V4SESSIONID); NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(sequenceid); *tl++ = txdr_unsigned(nd->nd_slotid); *tl++ = txdr_unsigned(highest_slotid); *tl++ = txdr_unsigned(target_highest_slotid); *tl = txdr_unsigned(sflags); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 reclaim complete service */ int nfsrvd_reclaimcomplete(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0, onefs; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); /* * I believe that a ReclaimComplete with rca_one_fs == TRUE is only * to be used after a file system has been transferred to a different * file server. However, RFC5661 is somewhat vague w.r.t. this and * the ESXi 6.7 client does both a ReclaimComplete with rca_one_fs * == TRUE and one with ReclaimComplete with rca_one_fs == FALSE. * Therefore, just ignore the rca_one_fs == TRUE operation and return * NFS_OK without doing anything. */ onefs = 0; if (*tl == newnfs_true) onefs = 1; nd->nd_repstat = nfsrv_checkreclaimcomplete(nd, onefs); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 destroy clientid service */ int nfsrvd_destroyclientid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsquad_t clientid; int error = 0; struct thread *p = curthread; if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0) goto nfsmout; NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; nd->nd_repstat = nfsrv_destroyclient(clientid, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 bind connection to session service */ int nfsrvd_bindconnsess(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; uint8_t sessid[NFSX_V4SESSIONID]; int error = 0, foreaft; if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0) goto nfsmout; NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED); NFSBCOPY(tl, sessid, NFSX_V4SESSIONID); tl += (NFSX_V4SESSIONID / NFSX_UNSIGNED); foreaft = fxdr_unsigned(int, *tl++); if (*tl == newnfs_true) { /* RDMA is not supported. */ nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } nd->nd_repstat = nfsrv_bindconnsess(nd, sessid, &foreaft); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED); NFSBCOPY(sessid, tl, NFSX_V4SESSIONID); tl += (NFSX_V4SESSIONID / NFSX_UNSIGNED); *tl++ = txdr_unsigned(foreaft); *tl = newnfs_false; } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 destroy session service */ int nfsrvd_destroysession(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint8_t *cp, sessid[NFSX_V4SESSIONID]; int error = 0; if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0) goto nfsmout; NFSM_DISSECT(cp, uint8_t *, NFSX_V4SESSIONID); NFSBCOPY(cp, sessid, NFSX_V4SESSIONID); nd->nd_repstat = nfsrv_destroysession(nd, sessid); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 free stateid service */ int nfsrvd_freestateid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int error = 0; struct thread *p = curthread; NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } nd->nd_repstat = nfsrv_freestateid(nd, &stateid, p); /* If the current stateid has been free'd, unset it. */ if (nd->nd_repstat == 0 && (nd->nd_flag & ND_CURSTATEID) != 0 && stateid.other[0] == nd->nd_curstateid.other[0] && stateid.other[1] == nd->nd_curstateid.other[1] && stateid.other[2] == nd->nd_curstateid.other[2]) nd->nd_flag &= ~ND_CURSTATEID; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layoutget service */ int nfsrvd_layoutget(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int error = 0, layoutlen, layouttype, iomode, maxcnt, retonclose; uint64_t offset, len, minlen; char *layp; struct thread *p = curthread; NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER + NFSX_STATEID); tl++; /* Signal layout available. Ignore for now. */ layouttype = fxdr_unsigned(int, *tl++); iomode = fxdr_unsigned(int, *tl++); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; minlen = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); maxcnt = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "layoutget ltyp=%d iom=%d off=%ju len=%ju mlen=%ju\n", layouttype, iomode, (uintmax_t)offset, (uintmax_t)len, (uintmax_t)minlen); if (len < minlen || (minlen != UINT64_MAX && offset + minlen < offset) || (len != UINT64_MAX && offset + len < offset)) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } layp = NULL; if (layouttype == NFSLAYOUT_NFSV4_1_FILES && nfsrv_maxpnfsmirror == 1) layp = malloc(NFSX_V4FILELAYOUT, M_TEMP, M_WAITOK); else if (layouttype == NFSLAYOUT_FLEXFILE) layp = malloc(NFSX_V4FLEXLAYOUT(nfsrv_maxpnfsmirror), M_TEMP, M_WAITOK); else nd->nd_repstat = NFSERR_UNKNLAYOUTTYPE; if (layp != NULL) nd->nd_repstat = nfsrv_layoutget(nd, vp, exp, layouttype, &iomode, &offset, &len, minlen, &stateid, maxcnt, &retonclose, &layoutlen, layp, nd->nd_cred, p); NFSD_DEBUG(4, "nfsrv_layoutget stat=%u layoutlen=%d\n", nd->nd_repstat, layoutlen); if (nd->nd_repstat == 0) { /* For NFSv4.1, set the Current StateID. */ if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_curstateid = stateid; nd->nd_flag |= ND_CURSTATEID; } NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + NFSX_STATEID + 2 * NFSX_HYPER); *tl++ = txdr_unsigned(retonclose); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY(stateid.other, tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); *tl++ = txdr_unsigned(1); /* Only returns one layout. */ txdr_hyper(offset, tl); tl += 2; txdr_hyper(len, tl); tl += 2; *tl++ = txdr_unsigned(iomode); *tl = txdr_unsigned(layouttype); nfsm_strtom(nd, layp, layoutlen); } else if (nd->nd_repstat == NFSERR_LAYOUTTRYLATER) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } free(layp, M_TEMP); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layoutcommit service */ int nfsrvd_layoutcommit(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int error = 0, hasnewoff, hasnewmtime, layouttype, maxcnt, reclaim; int hasnewsize; uint64_t offset, len, newoff = 0, newsize; struct timespec newmtime; char *layp; struct thread *p = curthread; layp = NULL; NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + 2 * NFSX_HYPER + NFSX_STATEID); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; reclaim = fxdr_unsigned(int, *tl++); stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set the * stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } hasnewoff = fxdr_unsigned(int, *tl); if (hasnewoff != 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); newoff = fxdr_hyper(tl); tl += 2; } else NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); hasnewmtime = fxdr_unsigned(int, *tl); if (hasnewmtime != 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4TIME + 2 * NFSX_UNSIGNED); fxdr_nfsv4time(tl, &newmtime); tl += (NFSX_V4TIME / NFSX_UNSIGNED); } else NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); layouttype = fxdr_unsigned(int, *tl++); maxcnt = fxdr_unsigned(int, *tl); if (maxcnt > 0) { layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, layp, maxcnt); if (error != 0) goto nfsmout; } nd->nd_repstat = nfsrv_layoutcommit(nd, vp, layouttype, hasnewoff, newoff, offset, len, hasnewmtime, &newmtime, reclaim, &stateid, maxcnt, layp, &hasnewsize, &newsize, nd->nd_cred, p); NFSD_DEBUG(4, "nfsrv_layoutcommit stat=%u\n", nd->nd_repstat); if (nd->nd_repstat == 0) { if (hasnewsize != 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); *tl++ = newnfs_true; txdr_hyper(newsize, tl); } else { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } } nfsmout: free(layp, M_TEMP); vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layoutreturn service */ int nfsrvd_layoutreturn(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl, *layp; nfsv4stateid_t stateid; int error = 0, fnd, kind, layouttype, iomode, maxcnt, reclaim; uint64_t offset, len; struct thread *p = curthread; layp = NULL; NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); reclaim = *tl++; layouttype = fxdr_unsigned(int, *tl++); iomode = fxdr_unsigned(int, *tl++); kind = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "layoutreturn recl=%d ltyp=%d iom=%d kind=%d\n", reclaim, layouttype, iomode, kind); if (kind == NFSV4LAYOUTRET_FILE) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + NFSX_UNSIGNED); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } maxcnt = fxdr_unsigned(int, *tl); if (maxcnt > 0) { layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, (char *)layp, maxcnt); if (error != 0) goto nfsmout; } } else { if (reclaim == newnfs_true) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } offset = len = 0; maxcnt = 0; } nd->nd_repstat = nfsrv_layoutreturn(nd, vp, layouttype, iomode, offset, len, reclaim, kind, &stateid, maxcnt, layp, &fnd, nd->nd_cred, p); NFSD_DEBUG(4, "nfsrv_layoutreturn stat=%u fnd=%d\n", nd->nd_repstat, fnd); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); if (fnd != 0) { *tl = newnfs_true; NFSM_BUILD(tl, uint32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY(stateid.other, tl, NFSX_STATEIDOTHER); } else *tl = newnfs_false; } nfsmout: free(layp, M_TEMP); vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layout error service */ int nfsrvd_layouterror(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int cnt, error = 0, i, stat; int opnum __unused; char devid[NFSX_V4DEVICEID]; uint64_t offset, len; NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + NFSX_UNSIGNED); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "layouterror off=%ju len=%ju cnt=%d\n", (uintmax_t)offset, (uintmax_t)len, cnt); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } /* * Ignore offset, len and stateid for now. */ for (i = 0; i < cnt; i++) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED); NFSBCOPY(tl, devid, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); stat = fxdr_unsigned(int, *tl++); opnum = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "nfsrvd_layouterr op=%d stat=%d\n", opnum, stat); /* * Except for NFSERR_ACCES and NFSERR_STALE errors, * disable the mirror. */ if (stat != NFSERR_ACCES && stat != NFSERR_STALE) nfsrv_delds(devid, curthread); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 layout stats service */ int nfsrvd_layoutstats(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int cnt, error = 0; int layouttype __unused; char devid[NFSX_V4DEVICEID] __unused; uint64_t offset, len, readcount, readbytes, writecount, writebytes __unused; NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_HYPER + NFSX_STATEID + NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); readcount = fxdr_hyper(tl); tl += 2; readbytes = fxdr_hyper(tl); tl += 2; writecount = fxdr_hyper(tl); tl += 2; writebytes = fxdr_hyper(tl); tl += 2; NFSBCOPY(tl, devid, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); layouttype = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(cnt), -1); if (error != 0) goto nfsmout; NFSD_DEBUG(4, "layoutstats cnt=%d\n", cnt); /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } /* * No use for the stats for now. */ nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 io_advise service */ int nfsrvd_ioadvise(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; nfsattrbit_t hints; int error = 0, ret; off_t offset, len; NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER); stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); error = nfsrv_getattrbits(nd, &hints, NULL, NULL); if (error != 0) goto nfsmout; /* * For the special stateid of other all 0s and seqid == 1, set * the stateid to the current stateid, if it is set. */ if (stateid.seqid == 1 && stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { if ((nd->nd_flag & ND_CURSTATEID) != 0) { stateid = nd->nd_curstateid; stateid.seqid = 0; } else { nd->nd_repstat = NFSERR_BADSTATEID; goto nfsmout; } } if (offset < 0) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } if (len < 0) len = 0; if (vp->v_type != VREG) { if (vp->v_type == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_WRONGTYPE; goto nfsmout; } /* * For now, we can only handle WILLNEED and DONTNEED and don't use * the stateid. */ if ((NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED) && !NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED)) || (NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED) && !NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED))) { NFSVOPUNLOCK(vp); if (NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED)) { ret = VOP_ADVISE(vp, offset, len, POSIX_FADV_WILLNEED); NFSZERO_ATTRBIT(&hints); if (ret == 0) NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED); else NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL); } else { ret = VOP_ADVISE(vp, offset, len, POSIX_FADV_DONTNEED); NFSZERO_ATTRBIT(&hints); if (ret == 0) NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED); else NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL); } vrele(vp); } else { NFSZERO_ATTRBIT(&hints); NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL); vput(vp); } nfsrv_putattrbit(nd, &hints); NFSEXITCODE2(error, nd); return (error); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 getdeviceinfo service */ int nfsrvd_getdevinfo(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl, maxcnt, notify[NFSV4_NOTIFYBITMAP]; int cnt, devaddrlen, error = 0, i, layouttype; char devid[NFSX_V4DEVICEID], *devaddr; time_t dev_time; NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_V4DEVICEID); NFSBCOPY(tl, devid, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); layouttype = fxdr_unsigned(int, *tl++); maxcnt = fxdr_unsigned(uint32_t, *tl++); cnt = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "getdevinfo ltyp=%d maxcnt=%u bitcnt=%d\n", layouttype, maxcnt, cnt); if (cnt > NFSV4_NOTIFYBITMAP || cnt < 0) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } if (cnt > 0) { NFSM_DISSECT(tl, uint32_t *, cnt * NFSX_UNSIGNED); for (i = 0; i < cnt; i++) notify[i] = fxdr_unsigned(uint32_t, *tl++); } for (i = cnt; i < NFSV4_NOTIFYBITMAP; i++) notify[i] = 0; /* * Check that the device id is not stale. Device ids are recreated * each time the nfsd threads are restarted. */ NFSBCOPY(devid, &dev_time, sizeof(dev_time)); if (dev_time != nfsdev_time) { nd->nd_repstat = NFSERR_NOENT; goto nfsmout; } /* Look for the device id. */ nd->nd_repstat = nfsrv_getdevinfo(devid, layouttype, &maxcnt, notify, &devaddrlen, &devaddr); NFSD_DEBUG(4, "nfsrv_getdevinfo stat=%u\n", nd->nd_repstat); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(layouttype); nfsm_strtom(nd, devaddr, devaddrlen); cnt = 0; for (i = 0; i < NFSV4_NOTIFYBITMAP; i++) { if (notify[i] != 0) cnt = i + 1; } NFSM_BUILD(tl, uint32_t *, (cnt + 1) * NFSX_UNSIGNED); *tl++ = txdr_unsigned(cnt); for (i = 0; i < cnt; i++) *tl++ = txdr_unsigned(notify[i]); } else if (nd->nd_repstat == NFSERR_TOOSMALL) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(maxcnt); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 test stateid service */ int nfsrvd_teststateid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t *stateidp = NULL, *tstateidp; int cnt, error = 0, i, ret; struct thread *p = curthread; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt <= 0 || cnt > 1024) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stateidp = mallocarray(cnt, sizeof(nfsv4stateid_t), M_TEMP, M_WAITOK); tstateidp = stateidp; for (i = 0; i < cnt; i++) { NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); tstateidp->seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, tstateidp->other, NFSX_STATEIDOTHER); tstateidp++; } NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(cnt); tstateidp = stateidp; for (i = 0; i < cnt; i++) { ret = nfsrv_teststateid(nd, tstateidp, p); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(ret); tstateidp++; } nfsmout: free(stateidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * nfs allocate service */ int nfsrvd_allocate(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; struct nfsvattr forat; int error = 0, forat_ret = 1, gotproxystateid; off_t off, len; struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; nfsv4stateid_t stateid; nfsquad_t clientid; nfsattrbit_t attrbits; gotproxystateid = 0; NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER); stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK2 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } stp->ls_stateid.other[2] = *tl++; /* * Don't allow this to be done for a DS. */ if ((nd->nd_flag & ND_DSSERVER) != 0) nd->nd_repstat = NFSERR_NOTSUPP; /* However, allow the proxy stateid. */ if (stp->ls_stateid.seqid == 0xffffffff && stp->ls_stateid.other[0] == 0x55555555 && stp->ls_stateid.other[1] == 0x55555555 && stp->ls_stateid.other[2] == 0x55555555) gotproxystateid = 1; off = fxdr_hyper(tl); tl += 2; lop->lo_first = off; len = fxdr_hyper(tl); lop->lo_end = off + len; /* * Paranoia, just in case it wraps around, which shouldn't * ever happen anyhow. */ if (nd->nd_repstat == 0 && (lop->lo_end < lop->lo_first || len <= 0)) nd->nd_repstat = NFSERR_INVAL; if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) nd->nd_repstat = NFSERR_WRONGTYPE; NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); forat_ret = nfsvno_getattr(vp, &forat, nd, curthread, 1, &attrbits); if (nd->nd_repstat == 0) nd->nd_repstat = forat_ret; if (nd->nd_repstat == 0 && (forat.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat == 0 && gotproxystateid == 0) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, curthread); if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_allocate(vp, off, len, nd->nd_cred, curthread); vput(vp); NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs copy service */ int nfsrvd_copy_file_range(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, vnode_t tovp, struct nfsexstuff *exp, struct nfsexstuff *toexp) { uint32_t *tl; struct nfsvattr at; int cnt, error = 0, ret; off_t inoff, outoff; uint64_t len; size_t xfer; struct nfsstate inst, outst, *instp = &inst, *outstp = &outst; struct nfslock inlo, outlo, *inlop = &inlo, *outlop = &outlo; nfsquad_t clientid; nfsv4stateid_t stateid; nfsattrbit_t attrbits; void *rl_rcookie, *rl_wcookie; rl_rcookie = rl_wcookie = NULL; if (nfsrv_devidcnt > 0) { /* * For a pNFS server, reply NFSERR_NOTSUPP so that the client * will do the copy via I/O on the DS(s). */ nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (vp == tovp) { /* Copying a byte range within the same file is not allowed. */ nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_STATEID + 3 * NFSX_HYPER + 3 * NFSX_UNSIGNED); instp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS); inlop->lo_flags = NFSLCK_READ; instp->ls_ownerlen = 0; instp->ls_op = NULL; instp->ls_uid = nd->nd_cred->cr_uid; instp->ls_stateid.seqid = fxdr_unsigned(uint32_t, *tl++); clientid.lval[0] = instp->ls_stateid.other[0] = *tl++; clientid.lval[1] = instp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) clientid.qval = nd->nd_clientid.qval; instp->ls_stateid.other[2] = *tl++; outstp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); outlop->lo_flags = NFSLCK_WRITE; outstp->ls_ownerlen = 0; outstp->ls_op = NULL; outstp->ls_uid = nd->nd_cred->cr_uid; outstp->ls_stateid.seqid = fxdr_unsigned(uint32_t, *tl++); outstp->ls_stateid.other[0] = *tl++; outstp->ls_stateid.other[1] = *tl++; outstp->ls_stateid.other[2] = *tl++; inoff = fxdr_hyper(tl); tl += 2; inlop->lo_first = inoff; outoff = fxdr_hyper(tl); tl += 2; outlop->lo_first = outoff; len = fxdr_hyper(tl); tl += 2; if (len == 0) { /* len == 0 means to EOF. */ inlop->lo_end = OFF_MAX; outlop->lo_end = OFF_MAX; } else { inlop->lo_end = inlop->lo_first + len; outlop->lo_end = outlop->lo_first + len; } /* * At this time only consecutive, synchronous copy is supported, * so ca_consecutive and ca_synchronous can be ignored. */ tl += 2; cnt = fxdr_unsigned(int, *tl); if ((nd->nd_flag & ND_DSSERVER) != 0 || cnt != 0) nd->nd_repstat = NFSERR_NOTSUPP; if (nd->nd_repstat == 0 && (inoff > OFF_MAX || outoff > OFF_MAX || inlop->lo_end > OFF_MAX || outlop->lo_end > OFF_MAX || inlop->lo_end < inlop->lo_first || outlop->lo_end < outlop->lo_first)) nd->nd_repstat = NFSERR_INVAL; if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) nd->nd_repstat = NFSERR_WRONGTYPE; /* Check permissions for the input file. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); ret = nfsvno_getattr(vp, &at, nd, curthread, 1, &attrbits); if (nd->nd_repstat == 0) nd->nd_repstat = ret; if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat == 0) nd->nd_repstat = nfsrv_lockctrl(vp, &instp, &inlop, NULL, clientid, &stateid, exp, nd, curthread); NFSVOPUNLOCK(vp); if (nd->nd_repstat != 0) goto out; error = NFSVOPLOCK(tovp, LK_SHARED); if (error != 0) goto out; if (vnode_vtype(tovp) != VREG) nd->nd_repstat = NFSERR_WRONGTYPE; /* For the output file, we only need the Owner attribute. */ ret = nfsvno_getattr(tovp, &at, nd, curthread, 1, &attrbits); if (nd->nd_repstat == 0) nd->nd_repstat = ret; if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(tovp, VWRITE, nd->nd_cred, toexp, curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat == 0) nd->nd_repstat = nfsrv_lockctrl(tovp, &outstp, &outlop, NULL, clientid, &stateid, toexp, nd, curthread); NFSVOPUNLOCK(tovp); /* Range lock the byte ranges for both invp and outvp. */ if (nd->nd_repstat == 0) { for (;;) { if (len == 0) { rl_wcookie = vn_rangelock_wlock(tovp, outoff, OFF_MAX); rl_rcookie = vn_rangelock_tryrlock(vp, inoff, OFF_MAX); } else { rl_wcookie = vn_rangelock_wlock(tovp, outoff, outoff + len); rl_rcookie = vn_rangelock_tryrlock(vp, inoff, inoff + len); } if (rl_rcookie != NULL) break; vn_rangelock_unlock(tovp, rl_wcookie); if (len == 0) rl_rcookie = vn_rangelock_rlock(vp, inoff, OFF_MAX); else rl_rcookie = vn_rangelock_rlock(vp, inoff, inoff + len); vn_rangelock_unlock(vp, rl_rcookie); } error = NFSVOPLOCK(vp, LK_SHARED); if (error == 0) { ret = nfsvno_getattr(vp, &at, nd, curthread, 1, NULL); if (ret == 0) { /* * Since invp is range locked, na_size should * not change. */ if (len == 0 && at.na_size > inoff) { /* * If len == 0, set it based on invp's * size. If offset is past EOF, just * leave len == 0. */ len = at.na_size - inoff; } else if (nfsrv_linux42server == 0 && inoff + len > at.na_size) { /* * RFC-7862 says that NFSERR_INVAL must * be returned when inoff + len exceeds * the file size, however the NFSv4.2 * Linux client likes to do this, so * only check if nfsrv_linux42server * is not set. */ nd->nd_repstat = NFSERR_INVAL; } } NFSVOPUNLOCK(vp); if (ret != 0 && nd->nd_repstat == 0) nd->nd_repstat = ret; } else if (nd->nd_repstat == 0) nd->nd_repstat = error; } /* * Do the actual copy to an upper limit of vfs.nfs.maxcopyrange. * This limit is applied to ensure that the RPC replies in a * reasonable time. */ if (len > nfs_maxcopyrange) xfer = nfs_maxcopyrange; else xfer = len; if (nd->nd_repstat == 0) { nd->nd_repstat = vn_copy_file_range(vp, &inoff, tovp, &outoff, &xfer, 0, nd->nd_cred, nd->nd_cred, NULL); if (nd->nd_repstat == 0) len = xfer; } /* Unlock the ranges. */ if (rl_rcookie != NULL) vn_rangelock_unlock(vp, rl_rcookie); if (rl_wcookie != NULL) vn_rangelock_unlock(tovp, rl_wcookie); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + NFSX_HYPER + NFSX_VERF); *tl++ = txdr_unsigned(0); /* No callback ids. */ txdr_hyper(len, tl); tl += 2; *tl++ = txdr_unsigned(NFSWRITE_UNSTABLE); *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl++ = txdr_unsigned(nfsboottime.tv_usec); *tl++ = newnfs_true; *tl = newnfs_true; } out: vrele(vp); vrele(tovp); NFSEXITCODE2(error, nd); return (error); nfsmout: vput(vp); vrele(tovp); NFSEXITCODE2(error, nd); return (error); } /* * nfs seek service */ int nfsrvd_seek(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, struct nfsexstuff *exp) { uint32_t *tl; struct nfsvattr at; int content, error = 0; off_t off; u_long cmd; nfsattrbit_t attrbits; bool eof; NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + NFSX_HYPER + NFSX_UNSIGNED); /* Ignore the stateid for now. */ tl += (NFSX_STATEID / NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; content = fxdr_unsigned(int, *tl); if (content == NFSV4CONTENT_DATA) cmd = FIOSEEKDATA; else if (content == NFSV4CONTENT_HOLE) cmd = FIOSEEKHOLE; else nd->nd_repstat = NFSERR_BADXDR; if (nd->nd_repstat == 0 && vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) nd->nd_repstat = NFSERR_WRONGTYPE; if (nd->nd_repstat == 0 && off < 0) nd->nd_repstat = NFSERR_NXIO; if (nd->nd_repstat == 0) { /* Check permissions for the input file. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); nd->nd_repstat = nfsvno_getattr(vp, &at, nd, curthread, 1, &attrbits); } if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat != 0) goto nfsmout; /* nfsvno_seek() unlocks and vrele()s the vp. */ nd->nd_repstat = nfsvno_seek(nd, vp, cmd, &off, content, &eof, nd->nd_cred, curthread); if (nd->nd_repstat == 0 && eof && content == NFSV4CONTENT_DATA && nfsrv_linux42server != 0) nd->nd_repstat = NFSERR_NXIO; if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); if (eof) *tl++ = newnfs_true; else *tl++ = newnfs_false; txdr_hyper(off, tl); } NFSEXITCODE2(error, nd); return (error); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs get extended attribute service */ int nfsrvd_getxattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; struct mbuf *mp = NULL, *mpend = NULL; int error, len; char *name; struct thread *p = curthread; uint16_t off; error = 0; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (len > EXTATTR_MAXNAMELEN) { nd->nd_repstat = NFSERR_NOXATTR; goto nfsmout; } name = malloc(len + 1, M_TEMP, M_WAITOK); nd->nd_repstat = nfsrv_mtostr(nd, name, len); if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_getxattr(vp, name, nd->nd_maxresp, nd->nd_cred, nd->nd_flag, nd->nd_maxextsiz, p, &mp, &mpend, &len); if (nd->nd_repstat == ENOATTR) nd->nd_repstat = NFSERR_NOXATTR; else if (nd->nd_repstat == EOPNOTSUPP) nd->nd_repstat = NFSERR_NOTSUPP; if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(len); if (len > 0) { nd->nd_mb->m_next = mp; nd->nd_mb = mpend; if ((mpend->m_flags & M_EXTPG) != 0) { nd->nd_flag |= ND_EXTPG; nd->nd_bextpg = mpend->m_epg_npgs - 1; nd->nd_bpos = (char *)(void *) PHYS_TO_DMAP(mpend->m_epg_pa[nd->nd_bextpg]); off = (nd->nd_bextpg == 0) ? mpend->m_epg_1st_off : 0; nd->nd_bpos += off + mpend->m_epg_last_len; nd->nd_bextpgsiz = PAGE_SIZE - mpend->m_epg_last_len - off; } else nd->nd_bpos = mtod(mpend, char *) + mpend->m_len; } } free(name, M_TEMP); nfsmout: if (nd->nd_repstat == 0) nd->nd_repstat = error; vput(vp); NFSEXITCODE2(0, nd); return (0); } /* * nfs set extended attribute service */ int nfsrvd_setxattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; struct nfsvattr ova, nva; nfsattrbit_t attrbits; int error, len, opt; char *name; size_t siz; struct thread *p = curthread; error = 0; name = NULL; NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); opt = fxdr_unsigned(int, *tl++); len = fxdr_unsigned(int, *tl); if (len <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (len > EXTATTR_MAXNAMELEN) { nd->nd_repstat = NFSERR_NOXATTR; goto nfsmout; } name = malloc(len + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, name, len); if (error != 0) goto nfsmout; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len < 0 || len > IOSIZE_MAX) { nd->nd_repstat = NFSERR_XATTR2BIG; goto nfsmout; } switch (opt) { case NFSV4SXATTR_CREATE: error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, &siz, nd->nd_cred, p); if (error != ENOATTR) nd->nd_repstat = NFSERR_EXIST; error = 0; break; case NFSV4SXATTR_REPLACE: error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL, &siz, nd->nd_cred, p); if (error != 0) nd->nd_repstat = NFSERR_NOXATTR; break; case NFSV4SXATTR_EITHER: break; default: nd->nd_repstat = NFSERR_BADXDR; } if (nd->nd_repstat != 0) goto nfsmout; /* Now, do the Set Extended attribute, with Change before and after. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); nd->nd_repstat = nfsvno_getattr(vp, &ova, nd, p, 1, &attrbits); if (nd->nd_repstat == 0) { nd->nd_repstat = nfsvno_setxattr(vp, name, len, nd->nd_md, nd->nd_dpos, nd->nd_cred, p); if (nd->nd_repstat == ENXIO) nd->nd_repstat = NFSERR_XATTR2BIG; } if (nd->nd_repstat == 0 && len > 0) nd->nd_repstat = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(ova.na_filerev, tl); tl += 2; txdr_hyper(nva.na_filerev, tl); } nfsmout: free(name, M_TEMP); if (nd->nd_repstat == 0) nd->nd_repstat = error; vput(vp); NFSEXITCODE2(0, nd); return (0); } /* * nfs remove extended attribute service */ int nfsrvd_rmxattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t *tl; struct nfsvattr ova, nva; nfsattrbit_t attrbits; int error, len; char *name; struct thread *p = curthread; error = 0; name = NULL; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (len > EXTATTR_MAXNAMELEN) { nd->nd_repstat = NFSERR_NOXATTR; goto nfsmout; } name = malloc(len + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, name, len); if (error != 0) goto nfsmout; if ((nd->nd_flag & ND_IMPLIEDCLID) == 0) { printf("EEK! nfsrvd_rmxattr: no implied clientid\n"); error = NFSERR_NOXATTR; goto nfsmout; } /* * Now, do the Remove Extended attribute, with Change before and * after. */ NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); nd->nd_repstat = nfsvno_getattr(vp, &ova, nd, p, 1, &attrbits); if (nd->nd_repstat == 0) { nd->nd_repstat = nfsvno_rmxattr(nd, vp, name, nd->nd_cred, p); if (nd->nd_repstat == ENOATTR) nd->nd_repstat = NFSERR_NOXATTR; } if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(ova.na_filerev, tl); tl += 2; txdr_hyper(nva.na_filerev, tl); } nfsmout: free(name, M_TEMP); if (nd->nd_repstat == 0) nd->nd_repstat = error; vput(vp); NFSEXITCODE2(0, nd); return (0); } /* * nfs list extended attribute service */ int nfsrvd_listxattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, __unused struct nfsexstuff *exp) { uint32_t cnt, *tl, len, len2, i, pos, retlen; int error; uint64_t cookie, cookie2; u_char *buf; bool eof; struct thread *p = curthread; error = 0; buf = NULL; NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); /* * The cookie doesn't need to be in net byte order, but FreeBSD * does so to make it more readable in packet traces. */ cookie = fxdr_hyper(tl); tl += 2; len = fxdr_unsigned(uint32_t, *tl); if (len == 0 || cookie >= IOSIZE_MAX) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (len > nd->nd_maxresp - NFS_MAXXDR) len = nd->nd_maxresp - NFS_MAXXDR; len2 = len; nd->nd_repstat = nfsvno_listxattr(vp, cookie, nd->nd_cred, p, &buf, &len, &eof); if (nd->nd_repstat == EOPNOTSUPP) nd->nd_repstat = NFSERR_NOTSUPP; if (nd->nd_repstat == 0) { cookie2 = cookie + len; if (cookie2 < cookie) nd->nd_repstat = NFSERR_BADXDR; } if (nd->nd_repstat == 0) { /* Now copy the entries out. */ retlen = NFSX_HYPER + 2 * NFSX_UNSIGNED; if (len == 0 && retlen <= len2) { /* The cookie was at eof. */ NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); txdr_hyper(cookie2, tl); tl += 2; *tl++ = txdr_unsigned(0); *tl = newnfs_true; goto nfsmout; } /* Sanity check the cookie. */ for (pos = 0; pos < len; pos += (i + 1)) { if (pos == cookie) break; i = buf[pos]; } if (pos != cookie) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } /* Loop around copying the entrie(s) out. */ cnt = 0; len -= cookie; i = buf[pos]; while (i < len && len2 >= retlen + NFSM_RNDUP(i) + NFSX_UNSIGNED) { if (cnt == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); txdr_hyper(cookie2, tl); tl += 2; } retlen += nfsm_strtom(nd, &buf[pos + 1], i); len -= (i + 1); pos += (i + 1); i = buf[pos]; cnt++; } /* * eof is set true/false by nfsvno_listxattr(), but if we * can't copy all entries returned by nfsvno_listxattr(), * we are not at eof. */ if (len > 0) eof = false; if (cnt > 0) { /* *tl is set above. */ *tl = txdr_unsigned(cnt); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); if (eof) *tl = newnfs_true; else *tl = newnfs_false; } else nd->nd_repstat = NFSERR_TOOSMALL; } nfsmout: free(buf, M_TEMP); if (nd->nd_repstat == 0) nd->nd_repstat = error; vput(vp); NFSEXITCODE2(0, nd); return (0); } /* * nfsv4 service not supported */ int nfsrvd_notsupp(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused struct nfsexstuff *exp) { nd->nd_repstat = NFSERR_NOTSUPP; NFSEXITCODE2(0, nd); return (0); } diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c index 01280c8e49c6..750eda2027ec 100644 --- a/sys/fs/nfsserver/nfs_nfsdstate.c +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -1,8773 +1,8774 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include struct nfsrv_stablefirst nfsrv_stablefirst; int nfsrv_issuedelegs = 0; int nfsrv_dolocallocks = 0; struct nfsv4lock nfsv4rootfs_lock; time_t nfsdev_time = 0; int nfsrv_layouthashsize; volatile int nfsrv_layoutcnt = 0; +extern uint32_t nfs_srvmaxio; extern int newnfs_numnfsd; extern struct nfsstatsv1 nfsstatsv1; extern int nfsrv_lease; extern struct timeval nfsboottime; extern u_int32_t newnfs_true, newnfs_false; extern struct mtx nfsrv_dslock_mtx; extern struct mtx nfsrv_recalllock_mtx; extern struct mtx nfsrv_dontlistlock_mtx; extern int nfsd_debuglevel; extern u_int nfsrv_dsdirsize; extern struct nfsdevicehead nfsrv_devidhead; extern int nfsrv_doflexfile; extern int nfsrv_maxpnfsmirror; NFSV4ROOTLOCKMUTEX; NFSSTATESPINLOCK; extern struct nfsdontlisthead nfsrv_dontlisthead; extern volatile int nfsrv_devidcnt; extern struct nfslayouthead nfsrv_recalllisthead; extern char *nfsrv_zeropnfsdat; SYSCTL_DECL(_vfs_nfsd); int nfsrv_statehashsize = NFSSTATEHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN, &nfsrv_statehashsize, 0, "Size of state hash table set via loader.conf"); int nfsrv_clienthashsize = NFSCLIENTHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN, &nfsrv_clienthashsize, 0, "Size of client hash table set via loader.conf"); int nfsrv_lockhashsize = NFSLOCKHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN, &nfsrv_lockhashsize, 0, "Size of file handle hash table set via loader.conf"); int nfsrv_sessionhashsize = NFSSESSIONHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN, &nfsrv_sessionhashsize, 0, "Size of session hash table set via loader.conf"); int nfsrv_layouthighwater = NFSLAYOUTHIGHWATER; SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN, &nfsrv_layouthighwater, 0, "High water mark for number of layouts set via loader.conf"); static int nfsrv_v4statelimit = NFSRV_V4STATELIMIT; SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN, &nfsrv_v4statelimit, 0, "High water limit for NFSv4 opens+locks+delegations"); static int nfsrv_writedelegifpos = 0; SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW, &nfsrv_writedelegifpos, 0, "Issue a write delegation for read opens if possible"); static int nfsrv_allowreadforwriteopen = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW, &nfsrv_allowreadforwriteopen, 0, "Allow Reads to be done with Write Access StateIDs"); int nfsrv_pnfsatime = 0; SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW, &nfsrv_pnfsatime, 0, "For pNFS service, do Getattr ops to keep atime up-to-date"); int nfsrv_flexlinuxhack = 0; SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW, &nfsrv_flexlinuxhack, 0, "For Linux clients, hack around Flex File Layout bug"); /* * Hash lists for nfs V4. */ struct nfsclienthashhead *nfsclienthash; struct nfslockhashhead *nfslockhash; struct nfssessionhash *nfssessionhash; struct nfslayouthash *nfslayouthash; volatile int nfsrv_dontlistlen = 0; static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0; static time_t nfsrvboottime; static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0; static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER; static int nfsrv_nogsscallback = 0; static volatile int nfsrv_writedelegcnt = 0; static int nfsrv_faildscnt; /* local functions */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp); static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p); static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freenfslock(struct nfslock *lop); static void nfsrv_freenfslockfile(struct nfslockfile *lfp); static void nfsrv_freedeleg(struct nfsstate *); static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, u_int32_t flags, struct nfsstate **stpp); static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp, struct nfsstate **stpp); static int nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p); static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp, struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit); static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp); static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp); static int nfsrv_getipnumber(u_char *cp); static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid); static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, u_int32_t flags); static int nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p); static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp, uint32_t callback, int op, const char *optag, struct nfsdsession **sepp, int *slotposp); static u_int32_t nfsrv_nextclientindex(void); static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp); static void nfsrv_markstable(struct nfsclient *clp); static void nfsrv_markreclaim(struct nfsclient *clp); static int nfsrv_checkstable(struct nfsclient *clp); static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct vnode *vp, NFSPROC_T *p); static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, vnode_t vp); static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp, struct nfsclient *clp, int *haslockp, NFSPROC_T *p); static int nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp); static time_t nfsrv_leaseexpiry(void); static void nfsrv_delaydelegtimeout(struct nfsstate *stp); static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, struct nfsstate *stp, struct nfsrvcache *op); static int nfsrv_nootherstate(struct nfsstate *stp); static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p); static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first, uint64_t init_end, NFSPROC_T *p); static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p); static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p); static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end); static void nfsrv_locklf(struct nfslockfile *lfp); static void nfsrv_unlocklf(struct nfslockfile *lfp); static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid); static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid); static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp, int dont_replycache, struct nfsdsession **sepp, int *slotposp); static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp); static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp, nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p); static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp); static void nfsrv_freelayoutlist(nfsquad_t clientid); static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode); static void nfsrv_freealllayouts(void); static void nfsrv_freedevid(struct nfsdevice *ds); static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p, struct nfsdevice **dsp); static void nfsrv_deleteds(struct nfsdevice *fndds); static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost); static void nfsrv_freealldevids(void); static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt, NFSPROC_T *p); static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p); static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype, NFSPROC_T *, struct nfslayout **lypp); static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt); static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs); static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs); static int nfsrv_dontlayout(fhandle_t *fhp); static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf, vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p, vnode_t *tvpp); static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp); /* * Scan the client list for a match and either return the current one, * create a new entry or return an error. * If returning a non-error, the clp structure must either be linked into * the client list or free'd. */ int nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p) { struct nfsclient *clp = NULL, *new_clp = *new_clpp; int i, error = 0, ret; struct nfsstate *stp, *tstp; #ifdef INET struct sockaddr_in *sin, *rin; #endif #ifdef INET6 struct sockaddr_in6 *sin6, *rin6; #endif struct nfsdsession *sep, *nsep; int zapit = 0, gotit, hasstate = 0, igotlock; static u_int64_t confirm_index = 0; /* * Check for state resource limit exceeded. */ if (nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } if (nfsrv_issuedelegs == 0 || ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0)) /* * Don't do callbacks when delegations are disabled or * for AUTH_GSS unless enabled via nfsrv_nogsscallback. * If establishing a callback connection is attempted * when a firewall is blocking the callback path, the * server may wait too long for the connect attempt to * succeed during the Open. Some clients, such as Linux, * may timeout and give up on the Open before the server * replies. Also, since AUTH_GSS callbacks are not * yet interoperability tested, they might cause the * server to crap out, if they get past the Init call to * the client. */ new_clp->lc_program = 0; /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); /* * Search for a match in the client list. */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { if (new_clp->lc_idlen == clp->lc_idlen && !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; break; } } if (gotit == 0) i++; } if (!gotit || (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) { if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) { /* * For NFSv4.1, if confirmp->lval[1] is non-zero, the * client is trying to update a confirmed clientid. */ NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); confirmp->lval[1] = 0; error = NFSERR_NOENT; goto out; } /* * Get rid of the old one. */ if (i != nfsrv_clienthashsize) { LIST_REMOVE(clp, lc_hash); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); zapit = 1; } /* * Add it after assigning a client id to it. */ new_clp->lc_flags |= LCL_NEEDSCONFIRM; if ((nd->nd_flag & ND_NFSV41) != 0) new_clp->lc_confirm.lval[0] = confirmp->lval[0] = ++confirm_index; else confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = (u_int32_t)nfsrvboottime; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; new_clp->lc_statemaxindex = 0; new_clp->lc_cbref = 0; new_clp->lc_expiry = nfsrv_leaseexpiry(); LIST_INIT(&new_clp->lc_open); LIST_INIT(&new_clp->lc_deleg); LIST_INIT(&new_clp->lc_olddeleg); LIST_INIT(&new_clp->lc_session); for (i = 0; i < nfsrv_statehashsize; i++) LIST_INIT(&new_clp->lc_stateid[i]); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); if (zapit) nfsrv_zapclient(clp, p); *new_clpp = NULL; goto out; } /* * Now, handle the cases where the id is already issued. */ if (nfsrv_notsamecredname(nd, clp)) { /* * Check to see if there is expired state that should go away. */ if (clp->lc_expiry < NFSD_MONOSEC && (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); } /* * If there is outstanding state, then reply NFSERR_CLIDINUSE per * RFC3530 Sec. 8.1.2 last para. */ if (!LIST_EMPTY(&clp->lc_deleg)) { hasstate = 1; } else if (LIST_EMPTY(&clp->lc_open)) { hasstate = 0; } else { hasstate = 0; /* Look for an Open on the OpenOwner */ LIST_FOREACH(stp, &clp->lc_open, ls_list) { if (!LIST_EMPTY(&stp->ls_open)) { hasstate = 1; break; } } } if (hasstate) { /* * If the uid doesn't match, return NFSERR_CLIDINUSE after * filling out the correct ipaddr and portnum. */ switch (clp->lc_req.nr_nam->sa_family) { #ifdef INET case AF_INET: sin = (struct sockaddr_in *)new_clp->lc_req.nr_nam; rin = (struct sockaddr_in *)clp->lc_req.nr_nam; sin->sin_addr.s_addr = rin->sin_addr.s_addr; sin->sin_port = rin->sin_port; break; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)new_clp->lc_req.nr_nam; rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; sin6->sin6_addr = rin6->sin6_addr; sin6->sin6_port = rin6->sin6_port; break; #endif } NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIDINUSE; goto out; } } if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) { /* * If the verifier has changed, the client has rebooted * and a new client id is issued. The old state info * can be thrown away once the SETCLIENTID_CONFIRM occurs. */ LIST_REMOVE(clp, lc_hash); /* Get rid of all sessions on this clientid. */ LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) { ret = nfsrv_freesession(sep, NULL); if (ret != 0) printf("nfsrv_setclient: verifier changed free" " session failed=%d\n", ret); } new_clp->lc_flags |= LCL_NEEDSCONFIRM; if ((nd->nd_flag & ND_NFSV41) != 0) new_clp->lc_confirm.lval[0] = confirmp->lval[0] = ++confirm_index; else confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = nfsrvboottime; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; new_clp->lc_statemaxindex = 0; new_clp->lc_cbref = 0; new_clp->lc_expiry = nfsrv_leaseexpiry(); /* * Save the state until confirmed. */ LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list); LIST_FOREACH(tstp, &new_clp->lc_open, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list) tstp->ls_clp = new_clp; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i], ls_hash); LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash) tstp->ls_clp = new_clp; } LIST_INIT(&new_clp->lc_session); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); /* * Must wait until any outstanding callback on the old clp * completes. */ NFSLOCKSTATE(); while (clp->lc_cbref) { clp->lc_flags |= LCL_WAKEUPWANTED; (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsd clp", 10 * hz); } NFSUNLOCKSTATE(); nfsrv_zapclient(clp, p); *new_clpp = NULL; goto out; } /* For NFSv4.1, mark that we found a confirmed clientid. */ if ((nd->nd_flag & ND_NFSV41) != 0) { clientidp->lval[0] = clp->lc_clientid.lval[0]; clientidp->lval[1] = clp->lc_clientid.lval[1]; confirmp->lval[0] = 0; /* Ignored by client */ confirmp->lval[1] = 1; } else { /* * id and verifier match, so update the net address info * and get rid of any existing callback authentication * handle, so a new one will be acquired. */ LIST_REMOVE(clp, lc_hash); new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN); new_clp->lc_expiry = nfsrv_leaseexpiry(); confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = clp->lc_clientid.lval[0]; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = clp->lc_clientid.lval[1]; new_clp->lc_delegtime = clp->lc_delegtime; new_clp->lc_stateindex = clp->lc_stateindex; new_clp->lc_statemaxindex = clp->lc_statemaxindex; new_clp->lc_cbref = 0; LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list); LIST_FOREACH(tstp, &new_clp->lc_open, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list) tstp->ls_clp = new_clp; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i], ls_hash); LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash) tstp->ls_clp = new_clp; } LIST_INIT(&new_clp->lc_session); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; } NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); if ((nd->nd_flag & ND_NFSV41) == 0) { /* * Must wait until any outstanding callback on the old clp * completes. */ NFSLOCKSTATE(); while (clp->lc_cbref) { clp->lc_flags |= LCL_WAKEUPWANTED; (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsdclp", 10 * hz); } NFSUNLOCKSTATE(); nfsrv_zapclient(clp, p); *new_clpp = NULL; } out: NFSEXITCODE2(error, nd); return (error); } /* * Check to see if the client id exists and optionally confirm it. */ int nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp, struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int i; struct nfsclienthashhead *hp; int error = 0, igotlock, doneok; struct nfssessionhash *shp; struct nfsdsession *sep; uint64_t sessid[2]; static uint64_t next_sess = 0; if (clpp) *clpp = NULL; if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 || opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } /* * If called with opflags == CLOPS_RENEW, the State Lock is * already held. Otherwise, we need to get either that or, * for the case of Confirm, lock out the nfsd threads. */ if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); /* * Create a new sessionid here, since we need to do it where * there is a mutex held to serialize update of next_sess. */ if ((nd->nd_flag & ND_NFSV41) != 0) { sessid[0] = ++next_sess; sessid[1] = clientid.qval; } NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSLOCKSTATE(); } /* For NFSv4.1, the clp is acquired from the associated session. */ if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 && opflags == CLOPS_RENEW) { clp = NULL; if ((nd->nd_flag & ND_HASSEQUENCE) != 0) { shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep != NULL) clp = sep->sess_clp; NFSUNLOCKSESSION(shp); } } else { hp = NFSCLIENTHASH(clientid); LIST_FOREACH(clp, hp, lc_hash) { if (clp->lc_clientid.lval[1] == clientid.lval[1]) break; } } if (clp == NULL) { if (opflags & CLOPS_CONFIRM) error = NFSERR_STALECLIENTID; else error = NFSERR_EXPIRED; } else if (clp->lc_flags & LCL_ADMINREVOKED) { /* * If marked admin revoked, just return the error. */ error = NFSERR_ADMINREVOKED; } if (error) { if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSUNLOCKSTATE(); } goto out; } /* * Perform any operations specified by the opflags. */ if (opflags & CLOPS_CONFIRM) { if (((nd->nd_flag & ND_NFSV41) != 0 && clp->lc_confirm.lval[0] != confirm.lval[0]) || ((nd->nd_flag & ND_NFSV41) == 0 && clp->lc_confirm.qval != confirm.qval)) error = NFSERR_STALECLIENTID; else if (nfsrv_notsamecredname(nd, clp)) error = NFSERR_CLIDINUSE; if (!error) { if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) == LCL_NEEDSCONFIRM) { /* * Hang onto the delegations (as old delegations) * for an Open with CLAIM_DELEGATE_PREV unless in * grace, but get rid of the rest of the state. */ nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_olddeleg); if (nfsrv_checkgrace(nd, clp, 0)) { /* In grace, so just delete delegations */ nfsrv_freedeleglist(&clp->lc_deleg); } else { LIST_FOREACH(stp, &clp->lc_deleg, ls_list) stp->ls_flags |= NFSLCK_OLDDELEG; clp->lc_delegtime = NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA; LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg, ls_list); } if ((nd->nd_flag & ND_NFSV41) != 0) clp->lc_program = cbprogram; } clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN); if (clp->lc_program) clp->lc_flags |= LCL_NEEDSCBNULL; /* For NFSv4.1, link the session onto the client. */ if (nsep != NULL) { /* Hold a reference on the xprt for a backchannel. */ if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) { if (clp->lc_req.nr_client == NULL) clp->lc_req.nr_client = (struct __rpc_client *) clnt_bck_create(nd->nd_xprt->xp_socket, cbprogram, NFSV4_CBVERS); if (clp->lc_req.nr_client != NULL) { SVC_ACQUIRE(nd->nd_xprt); CLNT_ACQUIRE(clp->lc_req.nr_client); nd->nd_xprt->xp_p2 = clp->lc_req.nr_client; /* Disable idle timeout. */ nd->nd_xprt->xp_idletimeout = 0; nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt; } else nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN; } NFSBCOPY(sessid, nsep->sess_sessionid, NFSX_V4SESSIONID); NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid, NFSX_V4SESSIONID); shp = NFSSESSIONHASH(nsep->sess_sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); LIST_INSERT_HEAD(&shp->list, nsep, sess_hash); LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list); nsep->sess_clp = clp; NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); } } } else if (clp->lc_flags & LCL_NEEDSCONFIRM) { error = NFSERR_EXPIRED; } /* * If called by the Renew Op, we must check the principal. */ if (!error && (opflags & CLOPS_RENEWOP)) { if (nfsrv_notsamecredname(nd, clp)) { doneok = 0; for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if ((stp->ls_flags & NFSLCK_OPEN) && stp->ls_uid == nd->nd_cred->cr_uid) { doneok = 1; break; } } } if (!doneok) error = NFSERR_ACCES; } if (!error && (clp->lc_flags & LCL_CBDOWN)) error = NFSERR_CBPATHDOWN; } if ((!error || error == NFSERR_CBPATHDOWN) && (opflags & CLOPS_RENEW)) { clp->lc_expiry = nfsrv_leaseexpiry(); } if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSUNLOCKSTATE(); } if (clpp) *clpp = clp; out: NFSEXITCODE2(error, nd); return (error); } /* * Perform the NFSv4.1 destroy clientid. */ int nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p) { struct nfsclient *clp; struct nfsclienthashhead *hp; int error = 0, i, igotlock; if (nfsrvboottime != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0); NFSUNLOCKV4ROOTMUTEX(); hp = NFSCLIENTHASH(clientid); LIST_FOREACH(clp, hp, lc_hash) { if (clp->lc_clientid.lval[1] == clientid.lval[1]) break; } if (clp == NULL) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); /* Just return ok, since it is gone. */ goto out; } /* * Free up all layouts on the clientid. Should the client return the * layouts? */ nfsrv_freelayoutlist(clientid); /* Scan for state on the clientid. */ for (i = 0; i < nfsrv_statehashsize; i++) if (!LIST_EMPTY(&clp->lc_stateid[i])) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIENTIDBUSY; goto out; } if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIENTIDBUSY; goto out; } /* Destroy the clientid and return ok. */ nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); nfsrv_zapclient(clp, p); out: NFSEXITCODE2(error, nd); return (error); } /* * Called from the new nfssvc syscall to admin revoke a clientid. * Returns 0 for success, error otherwise. */ int nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p) { struct nfsclient *clp = NULL; int i, error = 0; int gotit, igotlock; /* * First, lock out the nfsd so that state won't change while the * revocation record is being written to the stable storage restart * file. */ NFSLOCKV4ROOTMUTEX(); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); /* * Search for a match in the client list. */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { if (revokep->nclid_idlen == clp->lc_idlen && !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; break; } } i++; } if (!gotit) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 0); NFSUNLOCKV4ROOTMUTEX(); error = EPERM; goto out; } /* * Now, write out the revocation record */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); /* * and clear out the state, marking the clientid revoked. */ clp->lc_flags &= ~LCL_CALLBACKSON; clp->lc_flags |= LCL_ADMINREVOKED; nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 0); NFSUNLOCKV4ROOTMUTEX(); out: NFSEXITCODE(error); return (error); } /* * Dump out stats for all clients. Called from nfssvc(2), that is used * nfsstatsv1. */ void nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt) { struct nfsclient *clp; int i = 0, cnt = 0; /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock cannot be acquired while dumping the clients. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); /* * Rattle through the client lists until done. */ while (i < nfsrv_clienthashsize && cnt < maxcnt) { clp = LIST_FIRST(&nfsclienthash[i]); while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) { nfsrv_dumpaclient(clp, &dumpp[cnt]); cnt++; clp = LIST_NEXT(clp, lc_hash); } i++; } if (cnt < maxcnt) dumpp[cnt].ndcl_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd. */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp) { struct nfsstate *stp, *openstp, *lckownstp; struct nfslock *lop; sa_family_t af; #ifdef INET struct sockaddr_in *rin; #endif #ifdef INET6 struct sockaddr_in6 *rin6; #endif dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0; dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0; dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0; dumpp->ndcl_flags = clp->lc_flags; dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen; NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen); af = clp->lc_req.nr_nam->sa_family; dumpp->ndcl_addrfam = af; switch (af) { #ifdef INET case AF_INET: rin = (struct sockaddr_in *)clp->lc_req.nr_nam; dumpp->ndcl_cbaddr.sin_addr = rin->sin_addr; break; #endif #ifdef INET6 case AF_INET6: rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; dumpp->ndcl_cbaddr.sin6_addr = rin6->sin6_addr; break; #endif } /* * Now, scan the state lists and total up the opens and locks. */ LIST_FOREACH(stp, &clp->lc_open, ls_list) { dumpp->ndcl_nopenowners++; LIST_FOREACH(openstp, &stp->ls_open, ls_list) { dumpp->ndcl_nopens++; LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) { dumpp->ndcl_nlockowners++; LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) { dumpp->ndcl_nlocks++; } } } } /* * and the delegation lists. */ LIST_FOREACH(stp, &clp->lc_deleg, ls_list) { dumpp->ndcl_ndelegs++; } LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) { dumpp->ndcl_nolddelegs++; } } /* * Dump out lock stats for a file. */ void nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt, NFSPROC_T *p) { struct nfsstate *stp; struct nfslock *lop; int cnt = 0; struct nfslockfile *lfp; sa_family_t af; #ifdef INET struct sockaddr_in *rin; #endif #ifdef INET6 struct sockaddr_in6 *rin6; #endif int ret; fhandle_t nfh; ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p); /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock on it cannot be acquired while dumping the locks. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); if (!ret) ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0); if (ret) { ldumpp[0].ndlck_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); return; } /* * For each open share on file, dump it out. */ stp = LIST_FIRST(&lfp->lf_open); while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) { ldumpp[cnt].ndlck_flags = stp->ls_flags; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_openowner->ls_ownerlen; NFSBCOPY(stp->ls_openowner->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id, stp->ls_openowner->ls_ownerlen); ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); af = stp->ls_clp->lc_req.nr_nam->sa_family; ldumpp[cnt].ndlck_addrfam = af; switch (af) { #ifdef INET case AF_INET: rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam; ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr; break; #endif #ifdef INET6 case AF_INET6: rin6 = (struct sockaddr_in6 *) stp->ls_clp->lc_req.nr_nam; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr; break; #endif } stp = LIST_NEXT(stp, ls_file); cnt++; } /* * and all locks. */ lop = LIST_FIRST(&lfp->lf_lock); while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) { stp = lop->lo_stp; ldumpp[cnt].ndlck_flags = lop->lo_flags; ldumpp[cnt].ndlck_first = lop->lo_first; ldumpp[cnt].ndlck_end = lop->lo_end; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen; NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id, stp->ls_ownerlen); ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); af = stp->ls_clp->lc_req.nr_nam->sa_family; ldumpp[cnt].ndlck_addrfam = af; switch (af) { #ifdef INET case AF_INET: rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam; ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr; break; #endif #ifdef INET6 case AF_INET6: rin6 = (struct sockaddr_in6 *) stp->ls_clp->lc_req.nr_nam; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr; break; #endif } lop = LIST_NEXT(lop, lo_lckfile); cnt++; } /* * and the delegations. */ stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) { ldumpp[cnt].ndlck_flags = stp->ls_flags; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = 0; ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); af = stp->ls_clp->lc_req.nr_nam->sa_family; ldumpp[cnt].ndlck_addrfam = af; switch (af) { #ifdef INET case AF_INET: rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam; ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr; break; #endif #ifdef INET6 case AF_INET6: rin6 = (struct sockaddr_in6 *) stp->ls_clp->lc_req.nr_nam; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr; break; #endif } stp = LIST_NEXT(stp, ls_file); cnt++; } /* * If list isn't full, mark end of list by setting the client name * to zero length. */ if (cnt < maxcnt) ldumpp[cnt].ndlck_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * Server timer routine. It can scan any linked list, so long * as it holds the spin/mutex lock and there is no exclusive lock on * nfsv4rootfs_lock. * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok * to do this from a callout, since the spin locks work. For * Darwin, I'm not sure what will work correctly yet.) * Should be called once per second. */ void nfsrv_servertimer(void) { struct nfsclient *clp, *nclp; struct nfsstate *stp, *nstp; int got_ref, i; /* * Make sure nfsboottime is set. This is used by V3 as well * as V4. Note that nfsboottime is not nfsrvboottime, which is * only used by the V4 server for leases. */ if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); /* * If server hasn't started yet, just return. */ NFSLOCKSTATE(); if (nfsrv_stablefirst.nsf_eograce == 0) { NFSUNLOCKSTATE(); return; } if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) { if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) && NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce) nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); NFSUNLOCKSTATE(); return; } /* * Try and get a reference count on the nfsv4rootfs_lock so that * no nfsd thread can acquire an exclusive lock on it before this * call is done. If it is already exclusively locked, just return. */ NFSLOCKV4ROOTMUTEX(); got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); if (got_ref == 0) { NFSUNLOCKSTATE(); return; } /* * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { clp = LIST_FIRST(&nfsclienthash[i]); while (clp != LIST_END(&nfsclienthash[i])) { nclp = LIST_NEXT(clp, lc_hash); if (!(clp->lc_flags & LCL_EXPIREIT)) { if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC && ((LIST_EMPTY(&clp->lc_deleg) && LIST_EMPTY(&clp->lc_open)) || nfsrv_clients > nfsrv_clienthighwater)) || (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC || (clp->lc_expiry < NFSD_MONOSEC && (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) { /* * Lease has expired several nfsrv_lease times ago: * PLUS * - no state is associated with it * OR * - above high water mark for number of clients * (nfsrv_clienthighwater should be large enough * that this only occurs when clients fail to * use the same nfs_client_id4.id. Maybe somewhat * higher that the maximum number of clients that * will mount this server?) * OR * Lease has expired a very long time ago * OR * Lease has expired PLUS the number of opens + locks * has exceeded 90% of capacity * * --> Mark for expiry. The actual expiry will be done * by an nfsd sometime soon. */ clp->lc_flags |= LCL_EXPIREIT; nfsrv_stablefirst.nsf_flags |= (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT); } else { /* * If there are no opens, increment no open tick cnt * If time exceeds NFSNOOPEN, mark it to be thrown away * otherwise, if there is an open, reset no open time * Hopefully, this will avoid excessive re-creation * of open owners and subsequent open confirms. */ stp = LIST_FIRST(&clp->lc_open); while (stp != LIST_END(&clp->lc_open)) { nstp = LIST_NEXT(stp, ls_list); if (LIST_EMPTY(&stp->ls_open)) { stp->ls_noopens++; if (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > nfsrv_v4statelimit) nfsrv_stablefirst.nsf_flags |= NFSNSF_NOOPENS; } else { stp->ls_noopens = 0; } stp = nstp; } } } clp = nclp; } } NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * The following set of functions free up the various data structures. */ /* * Clear out all open/lock state related to this nfsclient. * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that * there are no other active nfsd threads. */ void nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p) { struct nfsstate *stp, *nstp; struct nfsdsession *sep, *nsep; LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) nfsrv_freeopenowner(stp, 1, p); if ((clp->lc_flags & LCL_ADMINREVOKED) == 0) LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) (void)nfsrv_freesession(sep, NULL); } /* * Free a client that has been cleaned. It should also already have been * removed from the lists. * (Just to be safe w.r.t. newnfs_disconnect(), call this function when * softclock interrupts are enabled.) */ void nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p) { #ifdef notyet if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) == (LCL_GSS | LCL_CALLBACKSON) && (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) && clp->lc_handlelen > 0) { clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE; clp->lc_hand.nfsh_flag |= NFSG_DESTROYED; (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL, NULL, 0, NULL, NULL, NULL, 0, p); } #endif newnfs_disconnect(NULL, &clp->lc_req); free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); NFSLOCKSTATE(); nfsstatsv1.srvclients--; nfsrv_openpluslock--; nfsrv_clients--; NFSUNLOCKSTATE(); } /* * Free a list of delegation state structures. * (This function will also free all nfslockfile structures that no * longer have associated state.) */ void nfsrv_freedeleglist(struct nfsstatehead *sthp) { struct nfsstate *stp, *nstp; LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) { nfsrv_freedeleg(stp); } LIST_INIT(sthp); } /* * Free up a delegation. */ static void nfsrv_freedeleg(struct nfsstate *stp) { struct nfslockfile *lfp; LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_file); if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0) nfsrv_writedelegcnt--; lfp = stp->ls_lfp; if (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) && lfp->lf_usecount == 0 && nfsv4_testlock(&lfp->lf_locallock_lck) == 0) nfsrv_freenfslockfile(lfp); free(stp, M_NFSDSTATE); nfsstatsv1.srvdelegates--; nfsrv_openpluslock--; nfsrv_delegatecnt--; } /* * This function frees an open owner and all associated opens. */ static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p) { struct nfsstate *nstp, *tstp; LIST_REMOVE(stp, ls_list); /* * Now, free all associated opens. */ nstp = LIST_FIRST(&stp->ls_open); while (nstp != LIST_END(&stp->ls_open)) { tstp = nstp; nstp = LIST_NEXT(nstp, ls_list); (void) nfsrv_freeopen(tstp, NULL, cansleep, p); } if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); free(stp, M_NFSDSTATE); nfsstatsv1.srvopenowners--; nfsrv_openpluslock--; } /* * This function frees an open (nfsstate open structure) with all associated * lock_owners and locks. It also frees the nfslockfile structure iff there * are no other opens on the file. * Returns 1 if it free'd the nfslockfile, 0 otherwise. */ static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { struct nfsstate *nstp, *tstp; struct nfslockfile *lfp; int ret; LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_file); lfp = stp->ls_lfp; /* * Now, free all lockowners associated with this open. */ LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp) nfsrv_freelockowner(tstp, vp, cansleep, p); /* * The nfslockfile is freed here if there are no locks * associated with the open. * If there are locks associated with the open, the * nfslockfile structure can be freed via nfsrv_freelockowner(). * Acquire the state mutex to avoid races with calls to * nfsrv_getlockfile(). */ if (cansleep != 0) NFSLOCKSTATE(); if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) && lfp->lf_usecount == 0 && (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) { nfsrv_freenfslockfile(lfp); ret = 1; } else ret = 0; if (cansleep != 0) NFSUNLOCKSTATE(); free(stp, M_NFSDSTATE); nfsstatsv1.srvopens--; nfsrv_openpluslock--; return (ret); } /* * Frees a lockowner and all associated locks. */ static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); nfsrv_freeallnfslocks(stp, vp, cansleep, p); if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); free(stp, M_NFSDSTATE); nfsstatsv1.srvlockowners--; nfsrv_openpluslock--; } /* * Free all the nfs locks on a lockowner. */ static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { struct nfslock *lop, *nlop; struct nfsrollback *rlp, *nrlp; struct nfslockfile *lfp = NULL; int gottvp = 0; vnode_t tvp = NULL; uint64_t first, end; if (vp != NULL) ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked"); lop = LIST_FIRST(&stp->ls_lock); while (lop != LIST_END(&stp->ls_lock)) { nlop = LIST_NEXT(lop, lo_lckowner); /* * Since all locks should be for the same file, lfp should * not change. */ if (lfp == NULL) lfp = lop->lo_lfp; else if (lfp != lop->lo_lfp) panic("allnfslocks"); /* * If vp is NULL and cansleep != 0, a vnode must be acquired * from the file handle. This only occurs when called from * nfsrv_cleanclient(). */ if (gottvp == 0) { if (nfsrv_dolocallocks == 0) tvp = NULL; else if (vp == NULL && cansleep != 0) { tvp = nfsvno_getvp(&lfp->lf_fh); if (tvp != NULL) NFSVOPUNLOCK(tvp); } else tvp = vp; gottvp = 1; } if (tvp != NULL) { if (cansleep == 0) panic("allnfs2"); first = lop->lo_first; end = lop->lo_end; nfsrv_freenfslock(lop); nfsrv_localunlock(tvp, lfp, first, end, p); LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) free(rlp, M_NFSDROLLBACK); LIST_INIT(&lfp->lf_rollback); } else nfsrv_freenfslock(lop); lop = nlop; } if (vp == NULL && tvp != NULL) vrele(tvp); } /* * Free an nfslock structure. */ static void nfsrv_freenfslock(struct nfslock *lop) { if (lop->lo_lckfile.le_prev != NULL) { LIST_REMOVE(lop, lo_lckfile); nfsstatsv1.srvlocks--; nfsrv_openpluslock--; } LIST_REMOVE(lop, lo_lckowner); free(lop, M_NFSDLOCK); } /* * This function frees an nfslockfile structure. */ static void nfsrv_freenfslockfile(struct nfslockfile *lfp) { LIST_REMOVE(lfp, lf_hash); free(lfp, M_NFSDLOCKFILE); } /* * This function looks up an nfsstate structure via stateid. */ static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags, struct nfsstate **stpp) { struct nfsstate *stp; struct nfsstatehead *hp; int error = 0; *stpp = NULL; hp = NFSSTATEHASH(clp, *stateidp); LIST_FOREACH(stp, hp, ls_hash) { if (!NFSBCMP(stp->ls_stateid.other, stateidp->other, NFSX_STATEIDOTHER)) break; } /* * If no state id in list, return NFSERR_BADSTATEID. */ if (stp == LIST_END(hp)) { error = NFSERR_BADSTATEID; goto out; } *stpp = stp; out: NFSEXITCODE(error); return (error); } /* * This function gets an nfsstate structure via owner string. */ static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp, struct nfsstate **stpp) { struct nfsstate *stp; *stpp = NULL; LIST_FOREACH(stp, hp, ls_list) { if (new_stp->ls_ownerlen == stp->ls_ownerlen && !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) { *stpp = stp; return; } } } /* * Lock control function called to update lock status. * Returns 0 upon success, -1 if there is no lock and the flags indicate * that one isn't to be created and an NFSERR_xxx for other errors. * The structures new_stp and new_lop are passed in as pointers that should * be set to NULL if the structure is used and shouldn't be free'd. * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are * never used and can safely be allocated on the stack. For all other * cases, *new_stpp and *new_lopp should be malloc'd before the call, * in case they are used. */ int nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp, struct nfslock **new_lopp, struct nfslockconflict *cfp, nfsquad_t clientid, nfsv4stateid_t *stateidp, __unused struct nfsexstuff *exp, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfslock *lop; struct nfsstate *new_stp = *new_stpp; struct nfslock *new_lop = *new_lopp; struct nfsstate *tstp, *mystp, *nstp; int specialid = 0; struct nfslockfile *lfp; struct nfslock *other_lop = NULL; struct nfsstate *stp, *lckstp = NULL; struct nfsclient *clp = NULL; u_int32_t bits; int error = 0, haslock = 0, ret, reterr; int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0; fhandle_t nfh; uint64_t first, end; uint32_t lock_flags; if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) { /* * Note the special cases of "all 1s" or "all 0s" stateids and * let reads with all 1s go ahead. */ if (new_stp->ls_stateid.seqid == 0x0 && new_stp->ls_stateid.other[0] == 0x0 && new_stp->ls_stateid.other[1] == 0x0 && new_stp->ls_stateid.other[2] == 0x0) specialid = 1; else if (new_stp->ls_stateid.seqid == 0xffffffff && new_stp->ls_stateid.other[0] == 0xffffffff && new_stp->ls_stateid.other[1] == 0xffffffff && new_stp->ls_stateid.other[2] == 0xffffffff) specialid = 2; } /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, specialid); if (error) goto out; /* * Check for state resource limit exceeded. */ if ((new_stp->ls_flags & NFSLCK_LOCK) && nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } /* * For the lock case, get another nfslock structure, * just in case we need it. * Malloc now, before we start sifting through the linked lists, * in case we have to wait for memory. */ tryagain: if (new_stp->ls_flags & NFSLCK_LOCK) other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); filestruct_locked = 0; reterr = 0; lfp = NULL; /* * Get the lockfile structure for CFH now, so we can do a sanity * check against the stateid, before incrementing the seqid#, since * we want to return NFSERR_BADSTATEID on failure and the seqid# * shouldn't be incremented for this case. * If nfsrv_getlockfile() returns -1, it means "not found", which * will be handled later. * If we are doing Lock/LockU and local locking is enabled, sleep * lock the nfslockfile structure. */ getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p); NFSLOCKSTATE(); if (getlckret == 0) { if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 && nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) { getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL, &lfp, &nfh, 1); if (getlckret == 0) filestruct_locked = 1; } else getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL, &lfp, &nfh, 0); } if (getlckret != 0 && getlckret != -1) reterr = getlckret; if (filestruct_locked != 0) { LIST_INIT(&lfp->lf_rollback); if ((new_stp->ls_flags & NFSLCK_LOCK)) { /* * For local locking, do the advisory locking now, so * that any conflict can be detected. A failure later * can be rolled back locally. If an error is returned, * struct nfslockfile has been unlocked and any local * locking rolled back. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1"); vnode_unlocked = 1; NFSVOPUNLOCK(vp); } reterr = nfsrv_locallock(vp, lfp, (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)), new_lop->lo_first, new_lop->lo_end, cfp, p); NFSLOCKSTATE(); } } if (specialid == 0) { if (new_stp->ls_flags & NFSLCK_TEST) { /* * RFC 3530 does not list LockT as an op that renews a * lease, but the consensus seems to be that it is ok * for a server to do so. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); /* * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid * error returns for LockT, just go ahead and test for a lock, * since there are no locks for this client, but other locks * can conflict. (ie. same client will always be false) */ if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED) error = 0; lckstp = new_stp; } else { error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) /* * Look up the stateid */ error = nfsrv_getstate(clp, &new_stp->ls_stateid, new_stp->ls_flags, &stp); /* * do some sanity checks for an unconfirmed open or a * stateid that refers to the wrong file, for an open stateid */ if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) && ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) || (getlckret == 0 && stp->ls_lfp != lfp))){ /* * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID * The only exception is using SETATTR with SIZE. * */ if ((new_stp->ls_flags & (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR) error = NFSERR_BADSTATEID; } if (error == 0 && (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) && getlckret == 0 && stp->ls_lfp != lfp) error = NFSERR_BADSTATEID; /* * If the lockowner stateid doesn't refer to the same file, * I believe that is considered ok, since some clients will * only create a single lockowner and use that for all locks * on all files. * For now, log it as a diagnostic, instead of considering it * a BadStateid. */ if (error == 0 && (stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 && getlckret == 0 && stp->ls_lfp != lfp) { #ifdef DIAGNOSTIC printf("Got a lock statid for different file open\n"); #endif /* error = NFSERR_BADSTATEID; */ } if (error == 0) { if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) { /* * If haslock set, we've already checked the seqid. */ if (!haslock) { if (stp->ls_flags & NFSLCK_OPEN) error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp->ls_openowner, new_stp->ls_op); else error = NFSERR_BADSTATEID; } if (!error) nfsrv_getowner(&stp->ls_open, new_stp, &lckstp); if (lckstp) { /* * For NFSv4.1 and NFSv4.2 allow an * open_to_lock_owner when the lock_owner already * exists. Just clear NFSLCK_OPENTOLOCK so that * a new lock_owner will not be created. * RFC7530 states that the error for NFSv4.0 * is NFS4ERR_BAD_SEQID. */ if ((nd->nd_flag & ND_NFSV41) != 0) new_stp->ls_flags &= ~NFSLCK_OPENTOLOCK; else error = NFSERR_BADSEQID; } else lckstp = new_stp; } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) { /* * If haslock set, ditto above. */ if (!haslock) { if (stp->ls_flags & NFSLCK_OPEN) error = NFSERR_BADSTATEID; else error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp, new_stp->ls_op); } lckstp = stp; } else { lckstp = stp; } } /* * If the seqid part of the stateid isn't the same, return * NFSERR_OLDSTATEID for cases other than I/O Ops. * For I/O Ops, only return NFSERR_OLDSTATEID if * nfsrv_returnoldstateid is set. (The consensus on the email * list was that most clients would prefer to not receive * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that * is what will happen, so I use the nfsrv_returnoldstateid to * allow for either server configuration.) */ if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid && (((nd->nd_flag & ND_NFSV41) == 0 && (!(new_stp->ls_flags & NFSLCK_CHECK) || nfsrv_returnoldstateid)) || ((nd->nd_flag & ND_NFSV41) != 0 && new_stp->ls_stateid.seqid != 0))) error = NFSERR_OLDSTATEID; } } /* * Now we can check for grace. */ if (!error) error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags); if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error && nfsrv_checkstable(clp)) error = NFSERR_NOGRACE; /* * If we successfully Reclaimed state, note that. */ if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error) nfsrv_markstable(clp); /* * At this point, either error == NFSERR_BADSTATEID or the * seqid# has been updated, so we can return any error. * If error == 0, there may be an error in: * nd_repstat - Set by the calling function. * reterr - Set above, if getting the nfslockfile structure * or acquiring the local lock failed. * (If both of these are set, nd_repstat should probably be * returned, since that error was detected before this * function call.) */ if (error != 0 || nd->nd_repstat != 0 || reterr != 0) { if (error == 0) { if (nd->nd_repstat != 0) error = nd->nd_repstat; else error = reterr; } if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2"); vnode_unlocked = 1; NFSVOPUNLOCK(vp); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); goto out; } /* * Check the nfsrv_getlockfile return. * Returned -1 if no structure found. */ if (getlckret == -1) { error = NFSERR_EXPIRED; /* * Called from lockt, so no lock is OK. */ if (new_stp->ls_flags & NFSLCK_TEST) { error = 0; } else if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) { /* * Called to check for a lock, OK if the stateid is all * 1s or all 0s, but there should be an nfsstate * otherwise. * (ie. If there is no open, I'll assume no share * deny bits.) */ if (specialid) error = 0; else error = NFSERR_BADSTATEID; } NFSUNLOCKSTATE(); goto out; } /* * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict. * For NFSLCK_CHECK, allow a read if write access is granted, * but check for a deny. For NFSLCK_LOCK, require correct access, * which implies a conflicting deny can't exist. */ if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) { /* * Four kinds of state id: * - specialid (all 0s or all 1s), only for NFSLCK_CHECK * - stateid for an open * - stateid for a delegation * - stateid for a lock owner */ if (!specialid) { if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) { delegation = 1; mystp = stp; nfsrv_delaydelegtimeout(stp); } else if (stp->ls_flags & NFSLCK_OPEN) { mystp = stp; } else { mystp = stp->ls_openstp; } /* * If locking or checking, require correct access * bit set. */ if (((new_stp->ls_flags & NFSLCK_LOCK) && !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) & mystp->ls_flags & NFSLCK_ACCESSBITS)) || ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) == (NFSLCK_CHECK | NFSLCK_READACCESS) && !(mystp->ls_flags & NFSLCK_READACCESS) && nfsrv_allowreadforwriteopen == 0) || ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) == (NFSLCK_CHECK | NFSLCK_WRITEACCESS) && !(mystp->ls_flags & NFSLCK_WRITEACCESS))) { if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl3"); vnode_unlocked = 1; NFSVOPUNLOCK(vp); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); error = NFSERR_OPENMODE; goto out; } } else mystp = NULL; if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) { /* * Check for a conflicting deny bit. */ LIST_FOREACH(tstp, &lfp->lf_open, ls_file) { if (tstp != mystp) { bits = tstp->ls_flags; bits >>= NFSLCK_SHIFT; if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) { KASSERT(vnode_unlocked == 0, ("nfsrv_lockctrl: vnode unlocked1")); ret = nfsrv_clientconflict(tstp->ls_clp, &haslock, vp, p); if (ret == 1) { /* * nfsrv_clientconflict unlocks state * when it returns non-zero. */ lckstp = NULL; goto tryagain; } if (ret == 0) NFSUNLOCKSTATE(); if (ret == 2) error = NFSERR_PERM; else error = NFSERR_OPENMODE; goto out; } } } /* We're outta here */ NFSUNLOCKSTATE(); goto out; } } /* * For setattr, just get rid of all the Delegations for other clients. */ if (new_stp->ls_flags & NFSLCK_SETATTR) { KASSERT(vnode_unlocked == 0, ("nfsrv_lockctrl: vnode unlocked2")); ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p); if (ret) { /* * nfsrv_cleandeleg() unlocks state when it * returns non-zero. */ if (ret == -1) { lckstp = NULL; goto tryagain; } error = ret; goto out; } if (!(new_stp->ls_flags & NFSLCK_CHECK) || (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg))) { NFSUNLOCKSTATE(); goto out; } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * I currently believe the conflict algorithm to be: * For Lock Ops (Lock/LockT/LockU) * - there is a conflict iff a different client has a write delegation * For Reading (Read Op) * - there is a conflict iff a different client has a write delegation * (the specialids are always a different client) * For Writing (Write/Setattr of size) * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (I don't understand why this isn't allowed, but that seems to be * the current consensus?) */ tstp = LIST_FIRST(&lfp->lf_deleg); while (tstp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(tstp, ls_file); if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))|| ((new_stp->ls_flags & NFSLCK_CHECK) && (new_lop->lo_flags & NFSLCK_READ))) && clp != tstp->ls_clp && (tstp->ls_flags & NFSLCK_DELEGWRITE)) || ((new_stp->ls_flags & NFSLCK_CHECK) && (new_lop->lo_flags & NFSLCK_WRITE) && (clp != tstp->ls_clp || (tstp->ls_flags & NFSLCK_DELEGREAD)))) { ret = 0; if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4"); NFSVOPUNLOCK(vp); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); vnode_unlocked = 0; if (VN_IS_DOOMED(vp)) ret = NFSERR_SERVERFAULT; NFSLOCKSTATE(); } if (ret == 0) ret = nfsrv_delegconflict(tstp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict unlocks state when it * returns non-zero, which it always does. */ if (other_lop) { free(other_lop, M_NFSDLOCK); other_lop = NULL; } if (ret == -1) { lckstp = NULL; goto tryagain; } error = ret; goto out; } /* Never gets here. */ } tstp = nstp; } /* * Handle the unlock case by calling nfsrv_updatelock(). * (Should I have done some access checking above for unlock? For now, * just let it happen.) */ if (new_stp->ls_flags & NFSLCK_UNLOCK) { first = new_lop->lo_first; end = new_lop->lo_end; nfsrv_updatelock(stp, new_lopp, &other_lop, lfp); stateidp->seqid = ++(stp->ls_stateid.seqid); if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = stp->ls_stateid.seqid = 1; stateidp->other[0] = stp->ls_stateid.other[0]; stateidp->other[1] = stp->ls_stateid.other[1]; stateidp->other[2] = stp->ls_stateid.other[2]; if (filestruct_locked != 0) { NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5"); vnode_unlocked = 1; NFSVOPUNLOCK(vp); } /* Update the local locks. */ nfsrv_localunlock(vp, lfp, first, end, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); goto out; } /* * Search for a conflicting lock. A lock conflicts if: * - the lock range overlaps and * - at least one lock is a write lock and * - it is not owned by the same lock owner */ if (!delegation) { LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) { if (new_lop->lo_end > lop->lo_first && new_lop->lo_first < lop->lo_end && (new_lop->lo_flags == NFSLCK_WRITE || lop->lo_flags == NFSLCK_WRITE) && lckstp != lop->lo_stp && (clp != lop->lo_stp->ls_clp || lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen || NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner, lckstp->ls_ownerlen))) { if (other_lop) { free(other_lop, M_NFSDLOCK); other_lop = NULL; } if (vnode_unlocked != 0) ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock, NULL, p); else ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock, vp, p); if (ret == 1) { if (filestruct_locked != 0) { if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6"); NFSVOPUNLOCK(vp); } /* Roll back local locks. */ nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); vnode_unlocked = 0; if (VN_IS_DOOMED(vp)) { error = NFSERR_SERVERFAULT; goto out; } } /* * nfsrv_clientconflict() unlocks state when it * returns non-zero. */ lckstp = NULL; goto tryagain; } /* * Found a conflicting lock, so record the conflict and * return the error. */ if (cfp != NULL && ret == 0) { cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0]; cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1]; cfp->cl_first = lop->lo_first; cfp->cl_end = lop->lo_end; cfp->cl_flags = lop->lo_flags; cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen; NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner, cfp->cl_ownerlen); } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else if (new_stp->ls_flags & NFSLCK_CHECK) error = NFSERR_LOCKED; else error = NFSERR_DENIED; if (filestruct_locked != 0 && ret == 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7"); vnode_unlocked = 1; NFSVOPUNLOCK(vp); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } if (ret == 0) NFSUNLOCKSTATE(); goto out; } } } /* * We only get here if there was no lock that conflicted. */ if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) { NFSUNLOCKSTATE(); goto out; } /* * We only get here when we are creating or modifying a lock. * There are two variants: * - exist_lock_owner where lock_owner exists * - open_to_lock_owner with new lock_owner */ first = new_lop->lo_first; end = new_lop->lo_end; lock_flags = new_lop->lo_flags; if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) { nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp); stateidp->seqid = ++(lckstp->ls_stateid.seqid); if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = lckstp->ls_stateid.seqid = 1; stateidp->other[0] = lckstp->ls_stateid.other[0]; stateidp->other[1] = lckstp->ls_stateid.other[1]; stateidp->other[2] = lckstp->ls_stateid.other[2]; } else { /* * The new open_to_lock_owner case. * Link the new nfsstate into the lists. */ new_stp->ls_seq = new_stp->ls_opentolockseq; nfsrvd_refcache(new_stp->ls_op); stateidp->seqid = new_stp->ls_stateid.seqid = 1; stateidp->other[0] = new_stp->ls_stateid.other[0] = clp->lc_clientid.lval[0]; stateidp->other[1] = new_stp->ls_stateid.other[1] = clp->lc_clientid.lval[1]; stateidp->other[2] = new_stp->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_stp->ls_clp = clp; LIST_INIT(&new_stp->ls_lock); new_stp->ls_openstp = stp; new_stp->ls_lfp = lfp; nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp, lfp); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid), new_stp, ls_hash); LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list); *new_lopp = NULL; *new_stpp = NULL; nfsstatsv1.srvlockowners++; nfsrv_openpluslock++; } if (filestruct_locked != 0) { NFSUNLOCKSTATE(); nfsrv_locallock_commit(lfp, lock_flags, first, end); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); out: if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (vnode_unlocked != 0) { NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (error == 0 && VN_IS_DOOMED(vp)) error = NFSERR_SERVERFAULT; } if (other_lop) free(other_lop, M_NFSDLOCK); NFSEXITCODE2(error, nd); return (error); } /* * Check for state errors for Open. * repstat is passed back out as an error if more critical errors * are not detected. */ int nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd, NFSPROC_T *p, int repstat) { struct nfsstate *stp, *nstp; struct nfsclient *clp; struct nfsstate *ownerstp; struct nfslockfile *lfp, *new_lfp; int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0; if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) readonly = 1; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; /* * Check for state resource limit exceeded. * Technically this should be SMP protected, but the worst * case error is "out by one or two" on the count when it * returns NFSERR_RESOURCE and the limit is just a rather * arbitrary high water mark, so no harm is done. */ if (nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } tryagain: new_lfp = malloc(sizeof (struct nfslockfile), M_NFSDLOCKFILE, M_WAITOK); if (vp) getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp, NULL, p); NFSLOCKSTATE(); /* * Get the nfsclient structure. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); /* * Look up the open owner. See if it needs confirmation and * check the seq#, as required. */ if (!error) nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp); if (!error && ownerstp) { error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp, new_stp->ls_op); /* * If the OpenOwner hasn't been confirmed, assume the * old one was a replay and this one is ok. * See: RFC3530 Sec. 14.2.18. */ if (error == NFSERR_BADSEQID && (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM)) error = 0; } /* * Check for grace. */ if (!error) error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags); if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error && nfsrv_checkstable(clp)) error = NFSERR_NOGRACE; /* * If none of the above errors occurred, let repstat be * returned. */ if (repstat && !error) error = repstat; if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } free(new_lfp, M_NFSDLOCKFILE); goto out; } /* * If vp == NULL, the file doesn't exist yet, so return ok. * (This always happens on the first pass, so haslock must be 0.) */ if (vp == NULL) { NFSUNLOCKSTATE(); free(new_lfp, M_NFSDLOCKFILE); goto out; } /* * Get the structure for the underlying file. */ if (getfhret) error = getfhret; else error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp, NULL, 0); if (new_lfp) free(new_lfp, M_NFSDLOCKFILE); if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Search for a conflicting open/share. */ if (new_stp->ls_flags & NFSLCK_DELEGCUR) { /* * For Delegate_Cur, search for the matching Delegation, * which indicates no conflict. * An old delegation should have been recovered by the * client doing a Claim_DELEGATE_Prev, so I won't let * it match and return NFSERR_EXPIRED. Should I let it * match? */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (!(stp->ls_flags & NFSLCK_OLDDELEG) && (((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) || stateidp->seqid == stp->ls_stateid.seqid) && !NFSBCMP(stateidp->other, stp->ls_stateid.other, NFSX_STATEIDOTHER)) break; } if (stp == LIST_END(&lfp->lf_deleg) || ((new_stp->ls_flags & NFSLCK_WRITEACCESS) && (stp->ls_flags & NFSLCK_DELEGREAD))) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } } /* * Check for access/deny bit conflicts. I check for the same * owner as well, in case the client didn't bother. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) && (((new_stp->ls_flags & NFSLCK_ACCESSBITS) & ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))|| ((stp->ls_flags & NFSLCK_ACCESSBITS) & ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){ ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p); if (ret == 1) { /* * nfsrv_clientconflict() unlocks * state when it returns non-zero. */ goto tryagain; } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else error = NFSERR_SHAREDENIED; if (ret == 0) NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * (If NFSLCK_DELEGCUR is set, it has a delegation, so there * isn't a conflict.) * I currently believe the conflict algorithm to be: * For Open with Read Access and Deny None * - there is a conflict iff a different client has a write delegation * For Open with other Write Access or any Deny except None * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (The current consensus is that this last case should be * considered a conflict since the client with a read delegation * could have done an Open with ReadAccess and WriteDeny * locally and then not have checked for the WriteDeny.) * Don't check for a Reclaim, since that will be dealt with * by nfsrv_openctrl(). */ if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) { stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if ((readonly && stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGWRITE)) || (!readonly && (stp->ls_clp != clp || (stp->ls_flags & NFSLCK_DELEGREAD)))) { ret = nfsrv_delegconflict(stp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ if (ret == -1) goto tryagain; error = ret; goto out; } } stp = nstp; } } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } out: NFSEXITCODE2(error, nd); return (error); } /* * Open control function to create/update open state for an open. */ int nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp, nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp, NFSPROC_T *p, u_quad_t filerev) { struct nfsstate *new_stp = *new_stpp; struct nfsstate *stp, *nstp; struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg; struct nfslockfile *lfp, *new_lfp; struct nfsclient *clp; int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1; int readonly = 0, cbret = 1, getfhret = 0; int gotstate = 0, len = 0; u_char *clidp = NULL; if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) readonly = 1; /* * Check for restart conditions (client and server). * (Paranoia, should have been detected by nfsrv_opencheck().) * If an error does show up, return NFSERR_EXPIRED, since the * the seqid# has already been incremented. */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) { printf("Nfsd: openctrl unexpected restart err=%d\n", error); error = NFSERR_EXPIRED; goto out; } clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK); tryagain: new_lfp = malloc(sizeof (struct nfslockfile), M_NFSDLOCKFILE, M_WAITOK); new_open = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); new_deleg = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp, NULL, p); NFSLOCKSTATE(); /* * Get the client structure. Since the linked lists could be changed * by other nfsd processes if this process does a tsleep(), one of * two things must be done. * 1 - don't tsleep() * or * 2 - get the nfsv4_lock() { indicated by haslock == 1 } * before using the lists, since this lock stops the other * nfsd. This should only be used for rare cases, since it * essentially single threads the nfsd. * At this time, it is only done for cases where the stable * storage file must be written prior to completion of state * expiration. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) && clp->lc_program) { /* * This happens on the first open for a client * that supports callbacks. */ NFSUNLOCKSTATE(); /* * Although nfsrv_docallback() will sleep, clp won't * go away, since they are only removed when the * nfsv4_lock() has blocked the nfsd threads. The * fields in clp can change, but having multiple * threads do this Null callback RPC should be * harmless. */ cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL, NULL, 0, NULL, NULL, NULL, 0, p); NFSLOCKSTATE(); clp->lc_flags &= ~LCL_NEEDSCBNULL; if (!cbret) clp->lc_flags |= LCL_CALLBACKSON; } /* * Look up the open owner. See if it needs confirmation and * check the seq#, as required. */ if (!error) nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp); if (error) { NFSUNLOCKSTATE(); printf("Nfsd: openctrl unexpected state err=%d\n", error); free(new_lfp, M_NFSDLOCKFILE); free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } if (new_stp->ls_flags & NFSLCK_RECLAIM) nfsrv_markstable(clp); /* * Get the structure for the underlying file. */ if (getfhret) error = getfhret; else error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp, NULL, 0); if (new_lfp) free(new_lfp, M_NFSDLOCKFILE); if (error) { NFSUNLOCKSTATE(); printf("Nfsd openctrl unexpected getlockfile err=%d\n", error); free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Search for a conflicting open/share. */ if (new_stp->ls_flags & NFSLCK_DELEGCUR) { /* * For Delegate_Cur, search for the matching Delegation, * which indicates no conflict. * An old delegation should have been recovered by the * client doing a Claim_DELEGATE_Prev, so I won't let * it match and return NFSERR_EXPIRED. Should I let it * match? */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (!(stp->ls_flags & NFSLCK_OLDDELEG) && (((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) || stateidp->seqid == stp->ls_stateid.seqid) && !NFSBCMP(stateidp->other, stp->ls_stateid.other, NFSX_STATEIDOTHER)) break; } if (stp == LIST_END(&lfp->lf_deleg) || ((new_stp->ls_flags & NFSLCK_WRITEACCESS) && (stp->ls_flags & NFSLCK_DELEGREAD))) { NFSUNLOCKSTATE(); printf("Nfsd openctrl unexpected expiry\n"); free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } /* * Don't issue a Delegation, since one already exists and * delay delegation timeout, as required. */ delegate = 0; nfsrv_delaydelegtimeout(stp); } /* * Check for access/deny bit conflicts. I also check for the * same owner, since the client might not have bothered to check. * Also, note an open for the same file and owner, if found, * which is all we do here for Delegate_Cur, since conflict * checking is already done. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (ownerstp && stp->ls_openowner == ownerstp) openstp = stp; if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) { /* * If another client has the file open, the only * delegation that can be issued is a Read delegation * and only if it is a Read open with Deny none. */ if (clp != stp->ls_clp) { if ((stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) writedeleg = 0; else delegate = 0; } if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) & ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))|| ((stp->ls_flags & NFSLCK_ACCESSBITS) & ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){ ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p); if (ret == 1) { /* * nfsrv_clientconflict() unlocks state * when it returns non-zero. */ free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); openstp = NULL; goto tryagain; } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else error = NFSERR_SHAREDENIED; if (ret == 0) NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); printf("nfsd openctrl unexpected client cnfl\n"); goto out; } } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * (If NFSLCK_DELEGCUR is set, it has a delegation, so there * isn't a conflict.) * I currently believe the conflict algorithm to be: * For Open with Read Access and Deny None * - there is a conflict iff a different client has a write delegation * For Open with other Write Access or any Deny except None * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (The current consensus is that this last case should be * considered a conflict since the client with a read delegation * could have done an Open with ReadAccess and WriteDeny * locally and then not have checked for the WriteDeny.) */ if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) { stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD)) writedeleg = 0; else delegate = 0; if ((readonly && stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGWRITE)) || (!readonly && (stp->ls_clp != clp || (stp->ls_flags & NFSLCK_DELEGREAD)))) { if (new_stp->ls_flags & NFSLCK_RECLAIM) { delegate = 2; } else { ret = nfsrv_delegconflict(stp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ printf("Nfsd openctrl unexpected deleg cnfl\n"); free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); if (ret == -1) { openstp = NULL; goto tryagain; } error = ret; goto out; } } } stp = nstp; } } /* * We only get here if there was no open that conflicted. * If an open for the owner exists, or in the access/deny bits. * Otherwise it is a new open. If the open_owner hasn't been * confirmed, replace the open with the new one needing confirmation, * otherwise add the open. */ if (new_stp->ls_flags & NFSLCK_DELEGPREV) { /* * Handle NFSLCK_DELEGPREV by searching the old delegations for * a match. If found, just move the old delegation to the current * delegation list and issue open. If not found, return * NFSERR_EXPIRED. */ LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) { if (stp->ls_lfp == lfp) { /* Found it */ if (stp->ls_clp != clp) panic("olddeleg clp"); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_hash); stp->ls_flags &= ~NFSLCK_OLDDELEG; stp->ls_stateid.seqid = delegstateidp->seqid = 1; stp->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; stp->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; stp->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); stp->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, stp->ls_stateid), stp, ls_hash); if (stp->ls_flags & NFSLCK_DELEGWRITE) *rflagsp |= NFSV4OPEN_WRITEDELEGATE; else *rflagsp |= NFSV4OPEN_READDELEGATE; clp->lc_delegtime = NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA; /* * Now, do the associated open. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)| NFSLCK_OPEN; if (stp->ls_flags & NFSLCK_DELEGWRITE) new_open->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); else new_open->ls_flags |= NFSLCK_READACCESS; new_open->ls_uid = new_stp->ls_uid; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); /* * and handle the open owner */ if (ownerstp) { new_open->ls_openowner = ownerstp; LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list); } else { new_open->ls_openowner = new_stp; new_stp->ls_flags = 0; nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; break; } } if (stp == LIST_END(&clp->lc_olddeleg)) error = NFSERR_EXPIRED; } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) { /* * Scan to see that no delegation for this client and file * doesn't already exist. * There also shouldn't yet be an Open for this file and * openowner. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (stp->ls_clp == clp) break; } if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) { /* * This is the Claim_Previous case with a delegation * type != Delegate_None. */ /* * First, add the delegation. (Although we must issue the * delegation, we can also ask for an immediate return.) */ new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (new_stp->ls_flags & NFSLCK_DELEGWRITE) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; nfsrv_writedelegcnt++; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; new_deleg->ls_lastrecall = 0; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; if (delegate == 2 || nfsrv_issuedelegs == 0 || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON || NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) || !NFSVNO_DELEGOK(vp)) *rflagsp |= NFSV4OPEN_RECALL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; /* * Now, do the associated open. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) | NFSLCK_OPEN; if (new_stp->ls_flags & NFSLCK_DELEGWRITE) new_open->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); else new_open->ls_flags |= NFSLCK_READACCESS; new_open->ls_uid = new_stp->ls_uid; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); /* * and handle the open owner */ if (ownerstp) { new_open->ls_openowner = ownerstp; LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list); } else { new_open->ls_openowner = new_stp; new_stp->ls_flags = 0; nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; } else { error = NFSERR_RECLAIMCONFLICT; } } else if (ownerstp) { if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) { /* Replace the open */ if (ownerstp->ls_op) nfsrvd_derefcache(ownerstp->ls_op); ownerstp->ls_op = new_stp->ls_op; nfsrvd_refcache(ownerstp->ls_op); ownerstp->ls_seq = new_stp->ls_seq; *rflagsp |= NFSV4OPEN_RESULTCONFIRM; stp = LIST_FIRST(&ownerstp->ls_open); stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) | NFSLCK_OPEN; stp->ls_stateid.seqid = 1; stp->ls_uid = new_stp->ls_uid; if (lfp != stp->ls_lfp) { LIST_REMOVE(stp, ls_file); LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file); stp->ls_lfp = lfp; } openstp = stp; } else if (openstp) { openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS); openstp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && openstp->ls_stateid.seqid == 0) openstp->ls_stateid.seqid = 1; /* * This is where we can choose to issue a delegation. */ if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0) *rflagsp |= NFSV4OPEN_WDNOTWANTED; else if (nfsrv_issuedelegs == 0) *rflagsp |= NFSV4OPEN_WDSUPPFTYPE; else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt)) *rflagsp |= NFSV4OPEN_WDRESOURCE; else if (delegate == 0 || writedeleg == 0 || NFSVNO_EXRDONLY(exp) || (readonly != 0 && nfsrv_writedelegifpos == 0) || !NFSVNO_DELEGOK(vp) || (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON) *rflagsp |= NFSV4OPEN_WDCONTENTION; else { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; new_deleg->ls_lastrecall = 0; nfsrv_writedelegcnt++; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } } else { new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)| NFSLCK_OPEN; new_open->ls_uid = new_stp->ls_uid; new_open->ls_openowner = ownerstp; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; /* * This is where we can choose to issue a delegation. */ if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0) *rflagsp |= NFSV4OPEN_WDNOTWANTED; else if (nfsrv_issuedelegs == 0) *rflagsp |= NFSV4OPEN_WDSUPPFTYPE; else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt)) *rflagsp |= NFSV4OPEN_WDRESOURCE; else if (delegate == 0 || (writedeleg == 0 && readonly == 0) || !NFSVNO_DELEGOK(vp) || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON) *rflagsp |= NFSV4OPEN_WDCONTENTION; else { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (writedeleg && !NFSVNO_EXRDONLY(exp) && (nfsrv_writedelegifpos || !readonly) && (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; nfsrv_writedelegcnt++; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; new_deleg->ls_lastrecall = 0; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } } } else { /* * New owner case. Start the open_owner sequence with a * Needs confirmation (unless a reclaim) and hang the * new open off it. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) | NFSLCK_OPEN; new_open->ls_uid = new_stp->ls_uid; LIST_INIT(&new_open->ls_open); new_open->ls_openowner = new_stp; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); if (new_stp->ls_flags & NFSLCK_RECLAIM) { new_stp->ls_flags = 0; } else if ((nd->nd_flag & ND_NFSV41) != 0) { /* NFSv4.1 never needs confirmation. */ new_stp->ls_flags = 0; /* * This is where we can choose to issue a delegation. */ if (delegate && nfsrv_issuedelegs && (writedeleg || readonly) && (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) == LCL_CALLBACKSON && !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) && NFSVNO_DELEGOK(vp) && ((nd->nd_flag & ND_NFSV41) == 0 || (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (writedeleg && !NFSVNO_EXRDONLY(exp) && (nfsrv_writedelegifpos || !readonly) && ((nd->nd_flag & ND_NFSV41) == 0 || (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0)) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; nfsrv_writedelegcnt++; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; new_deleg->ls_lastrecall = 0; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } /* * Since NFSv4.1 never does an OpenConfirm, the first * open state will be acquired here. */ if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) { clp->lc_flags |= LCL_STAMPEDSTABLE; len = clp->lc_idlen; NFSBCOPY(clp->lc_id, clidp, len); gotstate = 1; } } else { *rflagsp |= NFSV4OPEN_RESULTCONFIRM; new_stp->ls_flags = NFSLCK_NEEDSCONFIRM; } nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); openstp = new_open; new_open = NULL; *new_stpp = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } if (!error) { stateidp->seqid = openstp->ls_stateid.seqid; stateidp->other[0] = openstp->ls_stateid.other[0]; stateidp->other[1] = openstp->ls_stateid.other[1]; stateidp->other[2] = openstp->ls_stateid.other[2]; } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (new_open) free(new_open, M_NFSDSTATE); if (new_deleg) free(new_deleg, M_NFSDSTATE); /* * If the NFSv4.1 client just acquired its first open, write a timestamp * to the stable storage file. */ if (gotstate != 0) { nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p); nfsrv_backupstable(); } out: free(clidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Open update. Does the confirm, downgrade and close. */ int nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p, int *retwriteaccessp) { struct nfsstate *stp; struct nfsclient *clp; struct nfslockfile *lfp; u_int32_t bits; int error = 0, gotstate = 0, len = 0; u_char *clidp = NULL; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK); NFSLOCKSTATE(); /* * Get the open structure via clientid and stateid. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error) error = nfsrv_getstate(clp, &new_stp->ls_stateid, new_stp->ls_flags, &stp); /* * Sanity check the open. */ if (!error && (!(stp->ls_flags & NFSLCK_OPEN) || (!(new_stp->ls_flags & NFSLCK_CONFIRM) && (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) || ((new_stp->ls_flags & NFSLCK_CONFIRM) && (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))))) error = NFSERR_BADSTATEID; if (!error) error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp->ls_openowner, new_stp->ls_op); if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid && (((nd->nd_flag & ND_NFSV41) == 0 && !(new_stp->ls_flags & NFSLCK_CONFIRM)) || ((nd->nd_flag & ND_NFSV41) != 0 && new_stp->ls_stateid.seqid != 0))) error = NFSERR_OLDSTATEID; if (!error && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) error = NFSERR_ISDIR; else error = NFSERR_INVAL; } if (error) { /* * If a client tries to confirm an Open with a bad * seqid# and there are no byte range locks or other Opens * on the openowner, just throw it away, so the next use of the * openowner will start a fresh seq#. */ if (error == NFSERR_BADSEQID && (new_stp->ls_flags & NFSLCK_CONFIRM) && nfsrv_nootherstate(stp)) nfsrv_freeopenowner(stp->ls_openowner, 0, p); NFSUNLOCKSTATE(); goto out; } /* * Set the return stateid. */ stateidp->seqid = stp->ls_stateid.seqid + 1; if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = 1; stateidp->other[0] = stp->ls_stateid.other[0]; stateidp->other[1] = stp->ls_stateid.other[1]; stateidp->other[2] = stp->ls_stateid.other[2]; /* * Now, handle the three cases. */ if (new_stp->ls_flags & NFSLCK_CONFIRM) { /* * If the open doesn't need confirmation, it seems to me that * there is a client error, but I'll just log it and keep going? */ if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) printf("Nfsv4d: stray open confirm\n"); stp->ls_openowner->ls_flags = 0; stp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 0) stp->ls_stateid.seqid = 1; if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) { clp->lc_flags |= LCL_STAMPEDSTABLE; len = clp->lc_idlen; NFSBCOPY(clp->lc_id, clidp, len); gotstate = 1; } NFSUNLOCKSTATE(); } else if (new_stp->ls_flags & NFSLCK_CLOSE) { lfp = stp->ls_lfp; if (retwriteaccessp != NULL) { if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0) *retwriteaccessp = 1; else *retwriteaccessp = 0; } if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) { /* Get the lf lock */ nfsrv_locklf(lfp); NFSUNLOCKSTATE(); ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate"); NFSVOPUNLOCK(vp); if (nfsrv_freeopen(stp, vp, 1, p) == 0) { NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); } NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); } else { (void) nfsrv_freeopen(stp, NULL, 0, p); NFSUNLOCKSTATE(); } } else { /* * Update the share bits, making sure that the new set are a * subset of the old ones. */ bits = (new_stp->ls_flags & NFSLCK_SHAREBITS); if (~(stp->ls_flags) & bits) { NFSUNLOCKSTATE(); error = NFSERR_INVAL; goto out; } stp->ls_flags = (bits | NFSLCK_OPEN); stp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 0) stp->ls_stateid.seqid = 1; NFSUNLOCKSTATE(); } /* * If the client just confirmed its first open, write a timestamp * to the stable storage file. */ if (gotstate != 0) { nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p); nfsrv_backupstable(); } out: free(clidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Delegation update. Does the purge and return. */ int nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid, nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred, NFSPROC_T *p, int *retwriteaccessp) { struct nfsstate *stp; struct nfsclient *clp; int error = 0; fhandle_t fh; /* * Do a sanity check against the file handle for DelegReturn. */ if (vp) { error = nfsvno_getfh(vp, &fh, p); if (error) goto out; } /* * Check for restart conditions (client and server). */ if (op == NFSV4OP_DELEGRETURN) error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN, stateidp, 0); else error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE, stateidp, 0); NFSLOCKSTATE(); /* * Get the open structure via clientid and stateid. */ if (!error) error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error) { if (error == NFSERR_CBPATHDOWN) error = 0; if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN) error = NFSERR_STALESTATEID; } if (!error && op == NFSV4OP_DELEGRETURN) { error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp); if (!error && stp->ls_stateid.seqid != stateidp->seqid && ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0)) error = NFSERR_OLDSTATEID; } /* * NFSERR_EXPIRED means that the state has gone away, * so Delegations have been purged. Just return ok. */ if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) { NFSUNLOCKSTATE(); error = 0; goto out; } if (error) { NFSUNLOCKSTATE(); goto out; } if (op == NFSV4OP_DELEGRETURN) { if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh, sizeof (fhandle_t))) { NFSUNLOCKSTATE(); error = NFSERR_BADSTATEID; goto out; } if (retwriteaccessp != NULL) { if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0) *retwriteaccessp = 1; else *retwriteaccessp = 0; } nfsrv_freedeleg(stp); } else { nfsrv_freedeleglist(&clp->lc_olddeleg); } NFSUNLOCKSTATE(); error = 0; out: NFSEXITCODE(error); return (error); } /* * Release lock owner. */ int nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid, NFSPROC_T *p) { struct nfsstate *stp, *nstp, *openstp, *ownstp; struct nfsclient *clp; int error = 0; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; NFSLOCKSTATE(); /* * Get the lock owner by name. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, NULL, p); if (error) { NFSUNLOCKSTATE(); goto out; } LIST_FOREACH(ownstp, &clp->lc_open, ls_list) { LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) { stp = LIST_FIRST(&openstp->ls_open); while (stp != LIST_END(&openstp->ls_open)) { nstp = LIST_NEXT(stp, ls_list); /* * If the owner matches, check for locks and * then free or return an error. */ if (stp->ls_ownerlen == new_stp->ls_ownerlen && !NFSBCMP(stp->ls_owner, new_stp->ls_owner, stp->ls_ownerlen)){ if (LIST_EMPTY(&stp->ls_lock)) { nfsrv_freelockowner(stp, NULL, 0, p); } else { NFSUNLOCKSTATE(); error = NFSERR_LOCKSHELD; goto out; } } stp = nstp; } } } NFSUNLOCKSTATE(); out: NFSEXITCODE(error); return (error); } /* * Get the file handle for a lock structure. */ static int nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p) { fhandle_t *fhp = NULL; int error; /* * For lock, use the new nfslock structure, otherwise just * a fhandle_t on the stack. */ if (flags & NFSLCK_OPEN) { KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL")); fhp = &new_lfp->lf_fh; } else if (nfhp) { fhp = nfhp; } else { panic("nfsrv_getlockfh"); } error = nfsvno_getfh(vp, fhp, p); NFSEXITCODE(error); return (error); } /* * Get an nfs lock structure. Allocate one, as required, and return a * pointer to it. * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock. */ static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp, struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit) { struct nfslockfile *lfp; fhandle_t *fhp = NULL, *tfhp; struct nfslockhashhead *hp; struct nfslockfile *new_lfp = NULL; /* * For lock, use the new nfslock structure, otherwise just * a fhandle_t on the stack. */ if (flags & NFSLCK_OPEN) { new_lfp = *new_lfpp; fhp = &new_lfp->lf_fh; } else if (nfhp) { fhp = nfhp; } else { panic("nfsrv_getlockfile"); } hp = NFSLOCKHASH(fhp); LIST_FOREACH(lfp, hp, lf_hash) { tfhp = &lfp->lf_fh; if (NFSVNO_CMPFH(fhp, tfhp)) { if (lockit) nfsrv_locklf(lfp); *lfpp = lfp; return (0); } } if (!(flags & NFSLCK_OPEN)) return (-1); /* * No match, so chain the new one into the list. */ LIST_INIT(&new_lfp->lf_open); LIST_INIT(&new_lfp->lf_lock); LIST_INIT(&new_lfp->lf_deleg); LIST_INIT(&new_lfp->lf_locallock); LIST_INIT(&new_lfp->lf_rollback); new_lfp->lf_locallock_lck.nfslock_usecnt = 0; new_lfp->lf_locallock_lck.nfslock_lock = 0; new_lfp->lf_usecount = 0; LIST_INSERT_HEAD(hp, new_lfp, lf_hash); *lfpp = new_lfp; *new_lfpp = NULL; return (0); } /* * This function adds a nfslock lock structure to the list for the associated * nfsstate and nfslockfile structures. It will be inserted after the * entry pointed at by insert_lop. */ static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp) { struct nfslock *lop, *nlop; new_lop->lo_stp = stp; new_lop->lo_lfp = lfp; if (stp != NULL) { /* Insert in increasing lo_first order */ lop = LIST_FIRST(&lfp->lf_lock); if (lop == LIST_END(&lfp->lf_lock) || new_lop->lo_first <= lop->lo_first) { LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile); } else { nlop = LIST_NEXT(lop, lo_lckfile); while (nlop != LIST_END(&lfp->lf_lock) && nlop->lo_first < new_lop->lo_first) { lop = nlop; nlop = LIST_NEXT(lop, lo_lckfile); } LIST_INSERT_AFTER(lop, new_lop, lo_lckfile); } } else { new_lop->lo_lckfile.le_prev = NULL; /* list not used */ } /* * Insert after insert_lop, which is overloaded as stp or lfp for * an empty list. */ if (stp == NULL && (struct nfslockfile *)insert_lop == lfp) LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner); else if ((struct nfsstate *)insert_lop == stp) LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner); else LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner); if (stp != NULL) { nfsstatsv1.srvlocks++; nfsrv_openpluslock++; } } /* * This function updates the locking for a lock owner and given file. It * maintains a list of lock ranges ordered on increasing file offset that * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style). * It always adds new_lop to the list and sometimes uses the one pointed * at by other_lopp. */ static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp) { struct nfslock *new_lop = *new_lopp; struct nfslock *lop, *tlop, *ilop; struct nfslock *other_lop = *other_lopp; int unlock = 0, myfile = 0; u_int64_t tmp; /* * Work down the list until the lock is merged. */ if (new_lop->lo_flags & NFSLCK_UNLOCK) unlock = 1; if (stp != NULL) { ilop = (struct nfslock *)stp; lop = LIST_FIRST(&stp->ls_lock); } else { ilop = (struct nfslock *)lfp; lop = LIST_FIRST(&lfp->lf_locallock); } while (lop != NULL) { /* * Only check locks for this file that aren't before the start of * new lock's range. */ if (lop->lo_lfp == lfp) { myfile = 1; if (lop->lo_end >= new_lop->lo_first) { if (new_lop->lo_end < lop->lo_first) { /* * If the new lock ends before the start of the * current lock's range, no merge, just insert * the new lock. */ break; } if (new_lop->lo_flags == lop->lo_flags || (new_lop->lo_first <= lop->lo_first && new_lop->lo_end >= lop->lo_end)) { /* * This lock can be absorbed by the new lock/unlock. * This happens when it covers the entire range * of the old lock or is contiguous * with the old lock and is of the same type or an * unlock. */ if (lop->lo_first < new_lop->lo_first) new_lop->lo_first = lop->lo_first; if (lop->lo_end > new_lop->lo_end) new_lop->lo_end = lop->lo_end; tlop = lop; lop = LIST_NEXT(lop, lo_lckowner); nfsrv_freenfslock(tlop); continue; } /* * All these cases are for contiguous locks that are not the * same type, so they can't be merged. */ if (new_lop->lo_first <= lop->lo_first) { /* * This case is where the new lock overlaps with the * first part of the old lock. Move the start of the * old lock to just past the end of the new lock. The * new lock will be inserted in front of the old, since * ilop hasn't been updated. (We are done now.) */ lop->lo_first = new_lop->lo_end; break; } if (new_lop->lo_end >= lop->lo_end) { /* * This case is where the new lock overlaps with the * end of the old lock's range. Move the old lock's * end to just before the new lock's first and insert * the new lock after the old lock. * Might not be done yet, since the new lock could * overlap further locks with higher ranges. */ lop->lo_end = new_lop->lo_first; ilop = lop; lop = LIST_NEXT(lop, lo_lckowner); continue; } /* * The final case is where the new lock's range is in the * middle of the current lock's and splits the current lock * up. Use *other_lopp to handle the second part of the * split old lock range. (We are done now.) * For unlock, we use new_lop as other_lop and tmp, since * other_lop and new_lop are the same for this case. * We noted the unlock case above, so we don't need * new_lop->lo_flags any longer. */ tmp = new_lop->lo_first; if (other_lop == NULL) { if (!unlock) panic("nfsd srv update unlock"); other_lop = new_lop; *new_lopp = NULL; } other_lop->lo_first = new_lop->lo_end; other_lop->lo_end = lop->lo_end; other_lop->lo_flags = lop->lo_flags; other_lop->lo_stp = stp; other_lop->lo_lfp = lfp; lop->lo_end = tmp; nfsrv_insertlock(other_lop, lop, stp, lfp); *other_lopp = NULL; ilop = lop; break; } } ilop = lop; lop = LIST_NEXT(lop, lo_lckowner); if (myfile && (lop == NULL || lop->lo_lfp != lfp)) break; } /* * Insert the new lock in the list at the appropriate place. */ if (!unlock) { nfsrv_insertlock(new_lop, ilop, stp, lfp); *new_lopp = NULL; } } /* * This function handles sequencing of locks, etc. * It returns an error that indicates what the caller should do. */ static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, struct nfsstate *stp, struct nfsrvcache *op) { int error = 0; if ((nd->nd_flag & ND_NFSV41) != 0) /* NFSv4.1 ignores the open_seqid and lock_seqid. */ goto out; if (op != nd->nd_rp) panic("nfsrvstate checkseqid"); if (!(op->rc_flag & RC_INPROG)) panic("nfsrvstate not inprog"); if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) { printf("refcnt=%d\n", stp->ls_op->rc_refcnt); panic("nfsrvstate op refcnt"); } /* If ND_ERELOOKUP is set, the seqid has already been handled. */ if ((nd->nd_flag & ND_ERELOOKUP) != 0) goto out; if ((stp->ls_seq + 1) == seqid) { if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); stp->ls_op = op; nfsrvd_refcache(op); stp->ls_seq = seqid; goto out; } else if (stp->ls_seq == seqid && stp->ls_op && op->rc_xid == stp->ls_op->rc_xid && op->rc_refcnt == 0 && op->rc_reqlen == stp->ls_op->rc_reqlen && op->rc_cksum == stp->ls_op->rc_cksum) { if (stp->ls_op->rc_flag & RC_INPROG) { error = NFSERR_DONTREPLY; goto out; } nd->nd_rp = stp->ls_op; nd->nd_rp->rc_flag |= RC_INPROG; nfsrvd_delcache(op); error = NFSERR_REPLYFROMCACHE; goto out; } error = NFSERR_BADSEQID; out: NFSEXITCODE2(error, nd); return (error); } /* * Get the client ip address for callbacks. If the strings can't be parsed, * just set lc_program to 0 to indicate no callbacks are possible. * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set * the address to the client's transport address. This won't be used * for callbacks, but can be printed out by nfsstats for info.) * Return error if the xdr can't be parsed, 0 otherwise. */ int nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp) { u_int32_t *tl; u_char *cp, *cp2; int i, j, maxalen = 0, minalen = 0; sa_family_t af; #ifdef INET struct sockaddr_in *rin = NULL, *sin; #endif #ifdef INET6 struct sockaddr_in6 *rin6 = NULL, *sin6; #endif u_char *addr; int error = 0, cantparse = 0; union { in_addr_t ival; u_char cval[4]; } ip; union { in_port_t sval; u_char cval[2]; } port; /* 8 is the maximum length of the port# string. */ addr = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK); clp->lc_req.nr_client = NULL; clp->lc_req.nr_lock = 0; af = AF_UNSPEC; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i >= 3 && i <= 4) { error = nfsrv_mtostr(nd, addr, i); if (error) goto nfsmout; #ifdef INET if (!strcmp(addr, "tcp")) { clp->lc_flags |= LCL_TCPCALLBACK; clp->lc_req.nr_sotype = SOCK_STREAM; clp->lc_req.nr_soproto = IPPROTO_TCP; af = AF_INET; } else if (!strcmp(addr, "udp")) { clp->lc_req.nr_sotype = SOCK_DGRAM; clp->lc_req.nr_soproto = IPPROTO_UDP; af = AF_INET; } #endif #ifdef INET6 if (af == AF_UNSPEC) { if (!strcmp(addr, "tcp6")) { clp->lc_flags |= LCL_TCPCALLBACK; clp->lc_req.nr_sotype = SOCK_STREAM; clp->lc_req.nr_soproto = IPPROTO_TCP; af = AF_INET6; } else if (!strcmp(addr, "udp6")) { clp->lc_req.nr_sotype = SOCK_DGRAM; clp->lc_req.nr_soproto = IPPROTO_UDP; af = AF_INET6; } } #endif if (af == AF_UNSPEC) { cantparse = 1; } } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } /* * The caller has allocated clp->lc_req.nr_nam to be large enough * for either AF_INET or AF_INET6 and zeroed out the contents. * maxalen is set to the maximum length of the host IP address string * plus 8 for the maximum length of the port#. * minalen is set to the minimum length of the host IP address string * plus 4 for the minimum length of the port#. * These lengths do not include NULL termination, * so INET[6]_ADDRSTRLEN - 1 is used in the calculations. */ switch (af) { #ifdef INET case AF_INET: rin = (struct sockaddr_in *)clp->lc_req.nr_nam; rin->sin_family = AF_INET; rin->sin_len = sizeof(struct sockaddr_in); maxalen = INET_ADDRSTRLEN - 1 + 8; minalen = 7 + 4; break; #endif #ifdef INET6 case AF_INET6: rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; rin6->sin6_family = AF_INET6; rin6->sin6_len = sizeof(struct sockaddr_in6); maxalen = INET6_ADDRSTRLEN - 1 + 8; minalen = 3 + 4; break; #endif } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i < 0) { error = NFSERR_BADXDR; goto nfsmout; } else if (i == 0) { cantparse = 1; } else if (!cantparse && i <= maxalen && i >= minalen) { error = nfsrv_mtostr(nd, addr, i); if (error) goto nfsmout; /* * Parse out the address fields. We expect 6 decimal numbers * separated by '.'s for AF_INET and two decimal numbers * preceeded by '.'s for AF_INET6. */ cp = NULL; switch (af) { #ifdef INET6 /* * For AF_INET6, first parse the host address. */ case AF_INET6: cp = strchr(addr, '.'); if (cp != NULL) { *cp++ = '\0'; if (inet_pton(af, addr, &rin6->sin6_addr) == 1) i = 4; else { cp = NULL; cantparse = 1; } } break; #endif #ifdef INET case AF_INET: cp = addr; i = 0; break; #endif } while (cp != NULL && *cp && i < 6) { cp2 = cp; while (*cp2 && *cp2 != '.') cp2++; if (*cp2) *cp2++ = '\0'; else if (i != 5) { cantparse = 1; break; } j = nfsrv_getipnumber(cp); if (j >= 0) { if (i < 4) ip.cval[3 - i] = j; else port.cval[5 - i] = j; } else { cantparse = 1; break; } cp = cp2; i++; } if (!cantparse) { /* * The host address INADDR_ANY is (mis)used to indicate * "there is no valid callback address". */ switch (af) { #ifdef INET6 case AF_INET6: if (!IN6_ARE_ADDR_EQUAL(&rin6->sin6_addr, &in6addr_any)) rin6->sin6_port = htons(port.sval); else cantparse = 1; break; #endif #ifdef INET case AF_INET: if (ip.ival != INADDR_ANY) { rin->sin_addr.s_addr = htonl(ip.ival); rin->sin_port = htons(port.sval); } else { cantparse = 1; } break; #endif } } } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } if (cantparse) { switch (nd->nd_nam->sa_family) { #ifdef INET case AF_INET: sin = (struct sockaddr_in *)nd->nd_nam; rin = (struct sockaddr_in *)clp->lc_req.nr_nam; rin->sin_family = AF_INET; rin->sin_len = sizeof(struct sockaddr_in); rin->sin_addr.s_addr = sin->sin_addr.s_addr; rin->sin_port = 0x0; break; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)nd->nd_nam; rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; rin6->sin6_family = AF_INET6; rin6->sin6_len = sizeof(struct sockaddr_in6); rin6->sin6_addr = sin6->sin6_addr; rin6->sin6_port = 0x0; break; #endif } clp->lc_program = 0; } nfsmout: free(addr, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Turn a string of up to three decimal digits into a number. Return -1 upon * error. */ static int nfsrv_getipnumber(u_char *cp) { int i = 0, j = 0; while (*cp) { if (j > 2 || *cp < '0' || *cp > '9') return (-1); i *= 10; i += (*cp - '0'); cp++; j++; } if (i < 256) return (i); return (-1); } /* * This function checks for restart conditions. */ static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid) { int ret = 0; /* * First check for a server restart. Open, LockT, ReleaseLockOwner * and DelegPurge have a clientid, the rest a stateid. */ if (flags & (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) { if (clientid.lval[0] != nfsrvboottime) { ret = NFSERR_STALECLIENTID; goto out; } } else if (stateidp->other[0] != nfsrvboottime && specialid == 0) { ret = NFSERR_STALESTATEID; goto out; } /* * Read, Write, Setattr and LockT can return NFSERR_GRACE and do * not use a lock/open owner seqid#, so the check can be done now. * (The others will be checked, as required, later.) */ if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST))) goto out; NFSLOCKSTATE(); ret = nfsrv_checkgrace(NULL, NULL, flags); NFSUNLOCKSTATE(); out: NFSEXITCODE(ret); return (ret); } /* * Check for grace. */ static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, u_int32_t flags) { int error = 0, notreclaimed; struct nfsrv_stable *sp; if ((nfsrv_stablefirst.nsf_flags & (NFSNSF_UPDATEDONE | NFSNSF_GRACEOVER)) == 0) { /* * First, check to see if all of the clients have done a * ReclaimComplete. If so, grace can end now. */ notreclaimed = 0; LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) { notreclaimed = 1; break; } } if (notreclaimed == 0) nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); } if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) { if (flags & NFSLCK_RECLAIM) { error = NFSERR_NOGRACE; goto out; } } else { if (!(flags & NFSLCK_RECLAIM)) { error = NFSERR_GRACE; goto out; } if (nd != NULL && clp != NULL && (nd->nd_flag & ND_NFSV41) != 0 && (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) { error = NFSERR_NOGRACE; goto out; } /* * If grace is almost over and we are still getting Reclaims, * extend grace a bit. */ if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) > nfsrv_stablefirst.nsf_eograce) nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; } out: NFSEXITCODE(error); return (error); } /* * Do a server callback. * The "trunc" argument is slightly overloaded and refers to different * boolean arguments for CBRECALL and CBLAYOUTRECALL. */ static int nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p) { struct mbuf *m; u_int32_t *tl; struct nfsrv_descript *nd; struct ucred *cred; int error = 0, slotpos; u_int32_t callback; struct nfsdsession *sep = NULL; uint64_t tval; bool dotls; nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); cred = newnfs_getcred(); NFSLOCKSTATE(); /* mostly for lc_cbref++ */ if (clp->lc_flags & LCL_NEEDSCONFIRM) { NFSUNLOCKSTATE(); panic("docallb"); } clp->lc_cbref++; /* * Fill the callback program# and version into the request * structure for newnfs_connect() to use. */ clp->lc_req.nr_prog = clp->lc_program; #ifdef notnow if ((clp->lc_flags & LCL_NFSV41) != 0) clp->lc_req.nr_vers = NFSV41_CBVERS; else #endif clp->lc_req.nr_vers = NFSV4_CBVERS; /* * First, fill in some of the fields of nd and cr. */ nd->nd_flag = ND_NFSV4; if (clp->lc_flags & LCL_GSS) nd->nd_flag |= ND_KERBV; if ((clp->lc_flags & LCL_NFSV41) != 0) nd->nd_flag |= ND_NFSV41; if ((clp->lc_flags & LCL_NFSV42) != 0) nd->nd_flag |= ND_NFSV42; nd->nd_repstat = 0; cred->cr_uid = clp->lc_uid; cred->cr_gid = clp->lc_gid; callback = clp->lc_callback; NFSUNLOCKSTATE(); cred->cr_ngroups = 1; /* * Get the first mbuf for the request. */ MGET(m, M_WAITOK, MT_DATA); m->m_len = 0; nd->nd_mreq = nd->nd_mb = m; nd->nd_bpos = mtod(m, caddr_t); /* * and build the callback request. */ if (procnum == NFSV4OP_CBGETATTR) { nd->nd_procnum = NFSV4PROC_CBCOMPOUND; error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR, "CB Getattr", &sep, &slotpos); if (error != 0) { m_freem(nd->nd_mreq); goto errout; } (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0); (void)nfsrv_putattrbit(nd, attrbitp); } else if (procnum == NFSV4OP_CBRECALL) { nd->nd_procnum = NFSV4PROC_CBCOMPOUND; error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL, "CB Recall", &sep, &slotpos); if (error != 0) { m_freem(nd->nd_mreq); goto errout; } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(stateidp->seqid); NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (trunc) *tl = newnfs_true; else *tl = newnfs_false; (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0); } else if (procnum == NFSV4OP_CBLAYOUTRECALL) { NFSD_DEBUG(4, "docallback layout recall\n"); nd->nd_procnum = NFSV4PROC_CBCOMPOUND; error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep, &slotpos); NFSD_DEBUG(4, "aft cbcallargs=%d\n", error); if (error != 0) { m_freem(nd->nd_mreq); goto errout; } NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(laytype); *tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY); if (trunc) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl = txdr_unsigned(NFSV4LAYOUTRET_FILE); nfsm_fhtom(nd, (uint8_t *)fhp, NFSX_MYFH, 0); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID); tval = 0; txdr_hyper(tval, tl); tl += 2; tval = UINT64_MAX; txdr_hyper(tval, tl); tl += 2; *tl++ = txdr_unsigned(stateidp->seqid); NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); NFSD_DEBUG(4, "aft args\n"); } else if (procnum == NFSV4PROC_CBNULL) { nd->nd_procnum = NFSV4PROC_CBNULL; if ((clp->lc_flags & LCL_NFSV41) != 0) { error = nfsv4_getcbsession(clp, &sep); if (error != 0) { m_freem(nd->nd_mreq); goto errout; } } } else { error = NFSERR_SERVERFAULT; m_freem(nd->nd_mreq); goto errout; } /* * Call newnfs_connect(), as required, and then newnfs_request(). */ dotls = false; if ((clp->lc_flags & LCL_TLSCB) != 0) dotls = true; (void) newnfs_sndlock(&clp->lc_req.nr_lock); if (clp->lc_req.nr_client == NULL) { if ((clp->lc_flags & LCL_NFSV41) != 0) { error = ECONNREFUSED; if (procnum != NFSV4PROC_CBNULL) nfsv4_freeslot(&sep->sess_cbsess, slotpos, true); nfsrv_freesession(sep, NULL); } else if (nd->nd_procnum == NFSV4PROC_CBNULL) error = newnfs_connect(NULL, &clp->lc_req, cred, NULL, 1, dotls, &clp->lc_req.nr_client); else error = newnfs_connect(NULL, &clp->lc_req, cred, NULL, 3, dotls, &clp->lc_req.nr_client); } newnfs_sndunlock(&clp->lc_req.nr_lock); NFSD_DEBUG(4, "aft sndunlock=%d\n", error); if (!error) { if ((nd->nd_flag & ND_NFSV41) != 0) { KASSERT(sep != NULL, ("sep NULL")); if (sep->sess_cbsess.nfsess_xprt != NULL) error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL, NULL, cred, clp->lc_program, clp->lc_req.nr_vers, NULL, 1, NULL, &sep->sess_cbsess); else { /* * This should probably never occur, but if a * client somehow does an RPC without a * SequenceID Op that causes a callback just * after the nfsd threads have been terminated * and restared we could conceivably get here * without a backchannel xprt. */ printf("nfsrv_docallback: no xprt\n"); error = ECONNREFUSED; } NFSD_DEBUG(4, "aft newnfs_request=%d\n", error); if (error != 0 && procnum != NFSV4PROC_CBNULL) { /* * It is likely that the callback was never * processed by the client and, as such, * the sequence# for the session slot needs * to be backed up by one to avoid a * NFSERR_SEQMISORDERED error reply. * For the unlikely case where the callback * was processed by the client, this will * make the next callback on the slot * appear to be a retry. * Since callbacks never specify that the * reply be cached, this "apparent retry" * should not be a problem. */ nfsv4_freeslot(&sep->sess_cbsess, slotpos, true); } nfsrv_freesession(sep, NULL); } else error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL, NULL, cred, clp->lc_program, clp->lc_req.nr_vers, NULL, 1, NULL, NULL); } errout: NFSFREECRED(cred); /* * If error is set here, the Callback path isn't working * properly, so twiddle the appropriate LCL_ flags. * (nd_repstat != 0 indicates the Callback path is working, * but the callback failed on the client.) */ if (error) { /* * Mark the callback pathway down, which disabled issuing * of delegations and gets Renew to return NFSERR_CBPATHDOWN. */ NFSLOCKSTATE(); clp->lc_flags |= LCL_CBDOWN; NFSUNLOCKSTATE(); } else { /* * Callback worked. If the callback path was down, disable * callbacks, so no more delegations will be issued. (This * is done on the assumption that the callback pathway is * flakey.) */ NFSLOCKSTATE(); if (clp->lc_flags & LCL_CBDOWN) clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON); NFSUNLOCKSTATE(); if (nd->nd_repstat) { error = nd->nd_repstat; NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n", procnum, error); } else if (error == 0 && procnum == NFSV4OP_CBGETATTR) error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, NULL); m_freem(nd->nd_mrep); } NFSLOCKSTATE(); clp->lc_cbref--; if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) { clp->lc_flags &= ~LCL_WAKEUPWANTED; wakeup(clp); } NFSUNLOCKSTATE(); free(nd, M_TEMP); NFSEXITCODE(error); return (error); } /* * Set up the compound RPC for the callback. */ static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp, uint32_t callback, int op, const char *optag, struct nfsdsession **sepp, int *slotposp) { uint32_t *tl; int error, len; len = strlen(optag); (void)nfsm_strtom(nd, optag, len); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); if ((nd->nd_flag & ND_NFSV41) != 0) { if ((nd->nd_flag & ND_NFSV42) != 0) *tl++ = txdr_unsigned(NFSV42_MINORVERSION); else *tl++ = txdr_unsigned(NFSV41_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(2); *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE); error = nfsv4_setcbsequence(nd, clp, 1, sepp, slotposp); if (error != 0) return (error); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(op); } else { *tl++ = txdr_unsigned(NFSV4_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(1); *tl = txdr_unsigned(op); } return (0); } /* * Return the next index# for a clientid. Mostly just increment and return * the next one, but... if the 32bit unsigned does actually wrap around, * it should be rebooted. * At an average rate of one new client per second, it will wrap around in * approximately 136 years. (I think the server will have been shut * down or rebooted before then.) */ static u_int32_t nfsrv_nextclientindex(void) { static u_int32_t client_index = 0; client_index++; if (client_index != 0) return (client_index); printf("%s: out of clientids\n", __func__); return (client_index); } /* * Return the next index# for a stateid. Mostly just increment and return * the next one, but... if the 32bit unsigned does actually wrap around * (will a BSD server stay up that long?), find * new start and end values. */ static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp) { struct nfsstate *stp; int i; u_int32_t canuse, min_index, max_index; if (!(clp->lc_flags & LCL_INDEXNOTOK)) { clp->lc_stateindex++; if (clp->lc_stateindex != clp->lc_statemaxindex) return (clp->lc_stateindex); } /* * Yuck, we've hit the end. * Look for a new min and max. */ min_index = 0; max_index = 0xffffffff; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if (stp->ls_stateid.other[2] > 0x80000000) { if (stp->ls_stateid.other[2] < max_index) max_index = stp->ls_stateid.other[2]; } else { if (stp->ls_stateid.other[2] > min_index) min_index = stp->ls_stateid.other[2]; } } } /* * Yikes, highly unlikely, but I'll handle it anyhow. */ if (min_index == 0x80000000 && max_index == 0x80000001) { canuse = 0; /* * Loop around until we find an unused entry. Return that * and set LCL_INDEXNOTOK, so the search will continue next time. * (This is one of those rare cases where a goto is the * cleanest way to code the loop.) */ tryagain: for (i = 0; i < nfsrv_statehashsize; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if (stp->ls_stateid.other[2] == canuse) { canuse++; goto tryagain; } } } clp->lc_flags |= LCL_INDEXNOTOK; return (canuse); } /* * Ok to start again from min + 1. */ clp->lc_stateindex = min_index + 1; clp->lc_statemaxindex = max_index; clp->lc_flags &= ~LCL_INDEXNOTOK; return (clp->lc_stateindex); } /* * The following functions handle the stable storage file that deals with * the edge conditions described in RFC3530 Sec. 8.6.3. * The file is as follows: * - a single record at the beginning that has the lease time of the * previous server instance (before the last reboot) and the nfsrvboottime * values for the previous server boots. * These previous boot times are used to ensure that the current * nfsrvboottime does not, somehow, get set to a previous one. * (This is important so that Stale ClientIDs and StateIDs can * be recognized.) * The number of previous nfsvrboottime values precedes the list. * - followed by some number of appended records with: * - client id string * - flag that indicates it is a record revoking state via lease * expiration or similar * OR has successfully acquired state. * These structures vary in length, with the client string at the end, up * to NFSV4_OPAQUELIMIT in size. * * At the end of the grace period, the file is truncated, the first * record is rewritten with updated information and any acquired state * records for successful reclaims of state are written. * * Subsequent records are appended when the first state is issued to * a client and when state is revoked for a client. * * When reading the file in, state issued records that come later in * the file override older ones, since the append log is in cronological order. * If, for some reason, the file can't be read, the grace period is * immediately terminated and all reclaims get NFSERR_NOGRACE. */ /* * Read in the stable storage file. Called by nfssvc() before the nfsd * processes start servicing requests. */ void nfsrv_setupstable(NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfsrv_stable *sp, *nsp; struct nfst_rec *tsp; int error, i, tryagain; off_t off = 0; ssize_t aresid, len; /* * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without * a reboot, so state has not been lost. */ if (sf->nsf_flags & NFSNSF_UPDATEDONE) return; /* * Set Grace over just until the file reads successfully. */ nfsrvboottime = time_second; LIST_INIT(&sf->nsf_head); sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; if (sf->nsf_fp == NULL) return; error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); if (error || aresid || sf->nsf_numboots == 0 || sf->nsf_numboots > NFSNSF_MAXNUMBOOTS) return; /* * Now, read in the boottimes. */ sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) * sizeof (time_t), M_TEMP, M_WAITOK); off = sizeof (struct nfsf_rec); error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); if (error || aresid) { free(sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; return; } /* * Make sure this nfsrvboottime is different from all recorded * previous ones. */ do { tryagain = 0; for (i = 0; i < sf->nsf_numboots; i++) { if (nfsrvboottime == sf->nsf_bootvals[i]) { nfsrvboottime++; tryagain = 1; break; } } } while (tryagain); sf->nsf_flags |= NFSNSF_OK; off += (sf->nsf_numboots * sizeof (time_t)); /* * Read through the file, building a list of records for grace * checking. * Each record is between sizeof (struct nfst_rec) and * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1 * and is actually sizeof (struct nfst_rec) + nst_len - 1. */ tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK); do { error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1, off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid; if (error || (len > 0 && (len < sizeof (struct nfst_rec) || len < (sizeof (struct nfst_rec) + tsp->len - 1)))) { /* * Yuck, the file has been corrupted, so just return * after clearing out any restart state, so the grace period * is over. */ LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) { LIST_REMOVE(sp, nst_list); free(sp, M_TEMP); } free(tsp, M_TEMP); sf->nsf_flags &= ~NFSNSF_OK; free(sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; return; } if (len > 0) { off += sizeof (struct nfst_rec) + tsp->len - 1; /* * Search the list for a matching client. */ LIST_FOREACH(sp, &sf->nsf_head, nst_list) { if (tsp->len == sp->nst_len && !NFSBCMP(tsp->client, sp->nst_client, tsp->len)) break; } if (sp == LIST_END(&sf->nsf_head)) { sp = (struct nfsrv_stable *)malloc(tsp->len + sizeof (struct nfsrv_stable) - 1, M_TEMP, M_WAITOK); NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec, sizeof (struct nfst_rec) + tsp->len - 1); LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list); } else { if (tsp->flag == NFSNST_REVOKE) sp->nst_flag |= NFSNST_REVOKE; else /* * A subsequent timestamp indicates the client * did a setclientid/confirm and any previous * revoke is no longer relevant. */ sp->nst_flag &= ~NFSNST_REVOKE; } } } while (len > 0); free(tsp, M_TEMP); sf->nsf_flags = NFSNSF_OK; sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease + NFSRV_LEASEDELTA; } /* * Update the stable storage file, now that the grace period is over. */ void nfsrv_updatestable(NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfsrv_stable *sp, *nsp; int i; struct nfsvattr nva; vnode_t vp; #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000) mount_t mp = NULL; #endif int error; if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE)) return; sf->nsf_flags |= NFSNSF_UPDATEDONE; /* * Ok, we need to rewrite the stable storage file. * - truncate to 0 length * - write the new first structure * - loop through the data structures, writing out any that * have timestamps older than the old boot */ if (sf->nsf_bootvals) { sf->nsf_numboots++; for (i = sf->nsf_numboots - 2; i >= 0; i--) sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i]; } else { sf->nsf_numboots = 1; sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t), M_TEMP, M_WAITOK); } sf->nsf_bootvals[0] = nfsrvboottime; sf->nsf_lease = nfsrv_lease; NFSVNO_ATTRINIT(&nva); NFSVNO_SETATTRVAL(&nva, size, 0); vp = NFSFPVNODE(sf->nsf_fp); vn_start_write(vp, &mp, V_WAIT); if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p, NULL); NFSVOPUNLOCK(vp); } else error = EPERM; vn_finished_write(mp); if (!error) error = NFSD_RDWR(UIO_WRITE, vp, (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0, UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p); if (!error) error = NFSD_RDWR(UIO_WRITE, vp, (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), (off_t)(sizeof (struct nfsf_rec)), UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p); free(sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; if (error) { sf->nsf_flags &= ~NFSNSF_OK; printf("EEK! Can't write NfsV4 stable storage file\n"); return; } sf->nsf_flags |= NFSNSF_OK; /* * Loop through the list and write out timestamp records for * any clients that successfully reclaimed state. */ LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) { if (sp->nst_flag & NFSNST_GOTSTATE) { nfsrv_writestable(sp->nst_client, sp->nst_len, NFSNST_NEWSTATE, p); sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE; } LIST_REMOVE(sp, nst_list); free(sp, M_TEMP); } nfsrv_backupstable(); } /* * Append a record to the stable storage file. */ void nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfst_rec *sp; int error; if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL) return; sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) + len - 1, M_TEMP, M_WAITOK); sp->len = len; NFSBCOPY(client, sp->client, len); sp->flag = flag; error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp), (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0, UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p); free(sp, M_TEMP); if (error) { sf->nsf_flags &= ~NFSNSF_OK; printf("EEK! Can't write NfsV4 stable storage file\n"); } } /* * This function is called during the grace period to mark a client * that successfully reclaimed state. */ static void nfsrv_markstable(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First find the client structure. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } if (sp == LIST_END(&nfsrv_stablefirst.nsf_head)) return; /* * Now, just mark it and set the nfsclient back pointer. */ sp->nst_flag |= NFSNST_GOTSTATE; sp->nst_clp = clp; } /* * This function is called when a NFSv4.1 client does a ReclaimComplete. * Very similar to nfsrv_markstable(), except for the flag being set. */ static void nfsrv_markreclaim(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First find the client structure. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } if (sp == LIST_END(&nfsrv_stablefirst.nsf_head)) return; /* * Now, just set the flag. */ sp->nst_flag |= NFSNST_RECLAIMED; } /* * This function is called for a reclaim, to see if it gets grace. * It returns 0 if a reclaim is allowed, 1 otherwise. */ static int nfsrv_checkstable(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First, find the entry for the client. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } /* * If not in the list, state was revoked or no state was issued * since the previous reboot, a reclaim is denied. */ if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) || (sp->nst_flag & NFSNST_REVOKE) || !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK)) return (1); return (0); } /* * Test for and try to clear out a conflicting client. This is called by * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients * a found. * The trick here is that it can't revoke a conflicting client with an * expired lease unless it holds the v4root lock, so... * If no v4root lock, get the lock and return 1 to indicate "try again". * Return 0 to indicate the conflict can't be revoked and 1 to indicate * the revocation worked and the conflicting client is "bye, bye", so it * can be tried again. * Return 2 to indicate that the vnode is VIRF_DOOMED after NFSVOPLOCK(). * Unlocks State before a non-zero value is returned. */ static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp, NFSPROC_T *p) { int gotlock, lktype = 0; /* * If lease hasn't expired, we can't fix it. */ if (clp->lc_expiry >= NFSD_MONOSEC || !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) return (0); if (*haslockp == 0) { NFSUNLOCKSTATE(); if (vp != NULL) { lktype = NFSVOPISLOCKED(vp); NFSVOPUNLOCK(vp); } NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; if (vp != NULL) { NFSVOPLOCK(vp, lktype | LK_RETRY); if (VN_IS_DOOMED(vp)) return (2); } return (1); } NFSUNLOCKSTATE(); /* * Ok, we can expire the conflicting client. */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); nfsrv_zapclient(clp, p); return (1); } /* * Resolve a delegation conflict. * Returns 0 to indicate the conflict was resolved without sleeping. * Return -1 to indicate that the caller should check for conflicts again. * Return > 0 for an error that should be returned, normally NFSERR_DELAY. * * Also, manipulate the nfsv4root_lock, as required. It isn't changed * for a return of 0, since there was no sleep and it could be required * later. It is released for a return of NFSERR_DELAY, since the caller * will return that error. It is released when a sleep was done waiting * for the delegation to be returned or expire (so that other nfsds can * handle ops). Then, it must be acquired for the write to stable storage. * (This function is somewhat similar to nfsrv_clientconflict(), but * the semantics differ in a couple of subtle ways. The return of 0 * indicates the conflict was resolved without sleeping here, not * that the conflict can't be resolved and the handling of nfsv4root_lock * differs, as noted above.) * Unlocks State before returning a non-zero value. */ static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, vnode_t vp) { struct nfsclient *clp = stp->ls_clp; int gotlock, error, lktype = 0, retrycnt, zapped_clp; nfsv4stateid_t tstateid; fhandle_t tfh; /* * If the conflict is with an old delegation... */ if (stp->ls_flags & NFSLCK_OLDDELEG) { /* * You can delete it, if it has expired. */ if (clp->lc_delegtime < NFSD_MONOSEC) { nfsrv_freedeleg(stp); NFSUNLOCKSTATE(); error = -1; goto out; } NFSUNLOCKSTATE(); /* * During this delay, the old delegation could expire or it * could be recovered by the client via an Open with * CLAIM_DELEGATE_PREV. * Release the nfsv4root_lock, if held. */ if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_DELAY; goto out; } /* * It's a current delegation, so: * - check to see if the delegation has expired * - if so, get the v4root lock and then expire it */ if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0 || (stp->ls_lastrecall < NFSD_MONOSEC && clp->lc_expiry >= NFSD_MONOSEC && stp->ls_delegtime >= NFSD_MONOSEC)) { /* * - do a recall callback, since not yet done * For now, never allow truncate to be set. To use * truncate safely, it must be guaranteed that the * Remove, Rename or Setattr with size of 0 will * succeed and that would require major changes to * the VFS/Vnode OPs. * Set the expiry time large enough so that it won't expire * until after the callback, then set it correctly, once * the callback is done. (The delegation will now time * out whether or not the Recall worked ok. The timeout * will be extended when ops are done on the delegation * stateid, up to the timelimit.) */ if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) { stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) + NFSRV_LEASEDELTA; stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) + NFSRV_LEASEDELTA; stp->ls_flags |= NFSLCK_DELEGRECALL; } stp->ls_lastrecall = time_uptime + 1; /* * Loop NFSRV_CBRETRYCNT times while the CBRecall replies * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done * in order to try and avoid a race that could happen * when a CBRecall request passed the Open reply with * the delegation in it when transitting the network. * Since nfsrv_docallback will sleep, don't use stp after * the call. */ NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid, sizeof (tstateid)); NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh, sizeof (tfh)); NFSUNLOCKSTATE(); if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } retrycnt = 0; do { error = nfsrv_docallback(clp, NFSV4OP_CBRECALL, &tstateid, 0, &tfh, NULL, NULL, 0, p); retrycnt++; } while ((error == NFSERR_BADSTATEID || error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT); error = NFSERR_DELAY; goto out; } if (clp->lc_expiry >= NFSD_MONOSEC && stp->ls_delegtime >= NFSD_MONOSEC) { NFSUNLOCKSTATE(); /* * A recall has been done, but it has not yet expired. * So, RETURN_DELAY. */ if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_DELAY; goto out; } /* * If we don't yet have the lock, just get it and then return, * since we need that before deleting expired state, such as * this delegation. * When getting the lock, unlock the vnode, so other nfsds that * are in progress, won't get stuck waiting for the vnode lock. */ if (*haslockp == 0) { NFSUNLOCKSTATE(); if (vp != NULL) { lktype = NFSVOPISLOCKED(vp); NFSVOPUNLOCK(vp); } NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; if (vp != NULL) { NFSVOPLOCK(vp, lktype | LK_RETRY); if (VN_IS_DOOMED(vp)) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_PERM; goto out; } } error = -1; goto out; } NFSUNLOCKSTATE(); /* * Ok, we can delete the expired delegation. * First, write the Revoke record to stable storage and then * clear out the conflict. * Since all other nfsd threads are now blocked, we can safely * sleep without the state changing. */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); if (clp->lc_expiry < NFSD_MONOSEC) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); zapped_clp = 1; } else { nfsrv_freedeleg(stp); zapped_clp = 0; } if (zapped_clp) nfsrv_zapclient(clp, p); error = -1; out: NFSEXITCODE(error); return (error); } /* * Check for a remove allowed, if remove is set to 1 and get rid of * delegations. */ int nfsrv_checkremove(vnode_t vp, int remove, struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; struct nfslockfile *lfp; int error, haslock = 0; fhandle_t nfh; clp = NULL; /* * First, get the lock file structure. * (A return of -1 means no associated state, so remove ok.) */ error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); tryagain: NFSLOCKSTATE(); if (error == 0 && clientid.qval != 0) error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (error == -1) error = 0; goto out; } /* * Now, we must Recall any delegations. */ error = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p); if (error) { /* * nfsrv_cleandeleg() unlocks state for non-zero * return. */ if (error == -1) goto tryagain; if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Now, look for a conflicting open share. */ if (remove) { /* * If the entry in the directory was the last reference to the * corresponding filesystem object, the object can be destroyed * */ if(lfp->lf_usecount>1) LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (stp->ls_flags & NFSLCK_WRITEDENY) { error = NFSERR_FILEOPEN; break; } } } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } out: NFSEXITCODE(error); return (error); } /* * Clear out all delegations for the file referred to by lfp. * May return NFSERR_DELAY, if there will be a delay waiting for * delegations to expire. * Returns -1 to indicate it slept while recalling a delegation. * This function has the side effect of deleting the nfslockfile structure, * if it no longer has associated state and didn't have to sleep. * Unlocks State before a non-zero value is returned. */ static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp, struct nfsclient *clp, int *haslockp, NFSPROC_T *p) { struct nfsstate *stp, *nstp; int ret = 0; stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if (stp->ls_clp != clp) { ret = nfsrv_delegconflict(stp, haslockp, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ goto out; } } stp = nstp; } out: NFSEXITCODE(ret); return (ret); } /* * There are certain operations that, when being done outside of NFSv4, * require that any NFSv4 delegation for the file be recalled. * This function is to be called for those cases: * VOP_RENAME() - When a delegation is being recalled for any reason, * the client may have to do Opens against the server, using the file's * final component name. If the file has been renamed on the server, * that component name will be incorrect and the Open will fail. * VOP_REMOVE() - Theoretically, a client could Open a file after it has * been removed on the server, if there is a delegation issued to * that client for the file. I say "theoretically" since clients * normally do an Access Op before the Open and that Access Op will * fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so * they will detect the file's removal in the same manner. (There is * one case where RFC3530 allows a client to do an Open without first * doing an Access Op, which is passage of a check against the ACE * returned with a Write delegation, but current practice is to ignore * the ACE and always do an Access Op.) * Since the functions can only be called with an unlocked vnode, this * can't be done at this time. * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range * locks locally in the client, which are not visible to the server. To * deal with this, issuing of delegations for a vnode must be disabled * and all delegations for the vnode recalled. This is done via the * second function, using the VV_DISABLEDELEG vflag on the vnode. */ void nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p) { time_t starttime; int error; /* * First, check to see if the server is currently running and it has * been called for a regular file when issuing delegations. */ if (newnfs_numnfsd == 0 || vp->v_type != VREG || nfsrv_issuedelegs == 0) return; KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp)); /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock cannot be acquired by another thread. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); /* * Now, call nfsrv_checkremove() in a loop while it returns * NFSERR_DELAY. Return upon any other error or when timed out. */ starttime = NFSD_MONOSEC; do { if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(vp, 0, NULL, (nfsquad_t)((u_quad_t)0), p); NFSVOPUNLOCK(vp); } else error = EPERM; if (error == NFSERR_DELAY) { if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO) break; /* Sleep for a short period of time */ (void) nfs_catnap(PZERO, 0, "nfsremove"); } } while (error == NFSERR_DELAY); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } void nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p) { #ifdef VV_DISABLEDELEG /* * First, flag issuance of delegations disabled. */ atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG); #endif /* * Then call nfsd_recalldelegation() to get rid of all extant * delegations. */ nfsd_recalldelegation(vp, p); } /* * Check for conflicting locks, etc. and then get rid of delegations. * (At one point I thought that I should get rid of delegations for any * Setattr, since it could potentially disallow the I/O op (read or write) * allowed by the delegation. However, Setattr Ops that aren't changing * the size get a stateid of all 0s, so you can't tell if it is a delegation * for the same client or a different one, so I decided to only get rid * of delegations for other clients when the size is being changed.) * In general, a Setattr can disable NFS I/O Ops that are outstanding, such * as Write backs, even if there is no delegation, so it really isn't any * different?) */ int nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct nfsexstuff *exp, NFSPROC_T *p) { struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; int error = 0; nfsquad_t clientid; if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) { stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_first = nvap->na_size; } else { stp->ls_flags = 0; lop->lo_first = 0; } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL)) stp->ls_flags |= NFSLCK_SETATTR; if (stp->ls_flags == 0) goto out; lop->lo_end = NFS64BITSSET; lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = stateidp->seqid; clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0]; clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1]; stp->ls_stateid.other[2] = stateidp->other[2]; error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, stateidp, exp, nd, p); out: NFSEXITCODE2(error, nd); return (error); } /* * Check for a write delegation and do a CBGETATTR if there is one, updating * the attributes, as required. * Should I return an error if I can't get the attributes? (For now, I'll * just return ok. */ int nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p) { struct nfsstate *stp; struct nfslockfile *lfp; struct nfsclient *clp; struct nfsvattr nva; fhandle_t nfh; int error = 0; nfsattrbit_t cbbits; u_quad_t delegfilerev; NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits); if (!NFSNONZERO_ATTRBIT(&cbbits)) goto out; if (nfsrv_writedelegcnt == 0) goto out; /* * Get the lock file structure. * (A return of -1 means no associated state, so return ok.) */ error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); NFSLOCKSTATE(); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { NFSUNLOCKSTATE(); if (error == -1) error = 0; goto out; } /* * Now, look for a write delegation. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (stp->ls_flags & NFSLCK_DELEGWRITE) break; } if (stp == LIST_END(&lfp->lf_deleg)) { NFSUNLOCKSTATE(); goto out; } clp = stp->ls_clp; /* If the clientid is not confirmed, ignore the delegation. */ if (clp->lc_flags & LCL_NEEDSCONFIRM) { NFSUNLOCKSTATE(); goto out; } delegfilerev = stp->ls_filerev; /* * If the Write delegation was issued as a part of this Compound RPC * or if we have an Implied Clientid (used in a previous Op in this * compound) and it is the client the delegation was issued to, * just return ok. * I also assume that it is from the same client iff the network * host IP address is the same as the callback address. (Not * exactly correct by the RFC, but avoids a lot of Getattr * callbacks.) */ if (nd->nd_compref == stp->ls_compref || ((nd->nd_flag & ND_IMPLIEDCLID) && clp->lc_clientid.qval == nd->nd_clientid.qval) || nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) { NFSUNLOCKSTATE(); goto out; } /* * We are now done with the delegation state structure, * so the statelock can be released and we can now tsleep(). */ /* * Now, we must do the CB Getattr callback, to see if Change or Size * has changed. */ if (clp->lc_expiry >= NFSD_MONOSEC) { NFSUNLOCKSTATE(); NFSVNO_ATTRINIT(&nva); nva.na_filerev = NFS64BITSSET; error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL, 0, &nfh, &nva, &cbbits, 0, p); if (!error) { if ((nva.na_filerev != NFS64BITSSET && nva.na_filerev > delegfilerev) || (NFSVNO_ISSETSIZE(&nva) && nva.na_size != nvap->na_size)) { error = nfsvno_updfilerev(vp, nvap, nd, p); if (NFSVNO_ISSETSIZE(&nva)) nvap->na_size = nva.na_size; } } else error = 0; /* Ignore callback errors for now. */ } else { NFSUNLOCKSTATE(); } out: NFSEXITCODE2(error, nd); return (error); } /* * This function looks for openowners that haven't had any opens for * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS * is set. */ void nfsrv_throwawayopens(NFSPROC_T *p) { struct nfsclient *clp, *nclp; struct nfsstate *stp, *nstp; int i; NFSLOCKSTATE(); nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS; /* * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) { if (LIST_EMPTY(&stp->ls_open) && (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > nfsrv_v4statelimit)) nfsrv_freeopenowner(stp, 0, p); } } } NFSUNLOCKSTATE(); } /* * This function checks to see if the credentials are the same. * Returns 1 for not same, 0 otherwise. */ static int nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp) { if (nd->nd_flag & ND_GSS) { if (!(clp->lc_flags & LCL_GSS)) return (1); if (clp->lc_flags & LCL_NAME) { if (nd->nd_princlen != clp->lc_namelen || NFSBCMP(nd->nd_principal, clp->lc_name, clp->lc_namelen)) return (1); else return (0); } if (nd->nd_cred->cr_uid == clp->lc_uid) return (0); else return (1); } else if (clp->lc_flags & LCL_GSS) return (1); /* * For AUTH_SYS, allow the same uid or root. (This is underspecified * in RFC3530, which talks about principals, but doesn't say anything * about uids for AUTH_SYS.) */ if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0) return (0); else return (1); } /* * Calculate the lease expiry time. */ static time_t nfsrv_leaseexpiry(void) { if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC) return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA)); return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA); } /* * Delay the delegation timeout as far as ls_delegtimelimit, as required. */ static void nfsrv_delaydelegtimeout(struct nfsstate *stp) { if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) return; if ((stp->ls_delegtime + 15) > NFSD_MONOSEC && stp->ls_delegtime < stp->ls_delegtimelimit) { stp->ls_delegtime += nfsrv_lease; if (stp->ls_delegtime > stp->ls_delegtimelimit) stp->ls_delegtime = stp->ls_delegtimelimit; } } /* * This function checks to see if there is any other state associated * with the openowner for this Open. * It returns 1 if there is no other state, 0 otherwise. */ static int nfsrv_nootherstate(struct nfsstate *stp) { struct nfsstate *tstp; LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) { if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock)) return (0); } return (1); } /* * Create a list of lock deltas (changes to local byte range locking * that can be rolled back using the list) and apply the changes via * nfsvno_advlock(). Optionally, lock the list. It is expected that either * the rollback or update function will be called after this. * It returns an error (and rolls back, as required), if any nfsvno_advlock() * call fails. If it returns an error, it will unlock the list. */ static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p) { struct nfslock *lop, *nlop; int error = 0; /* Loop through the list of locks. */ lop = LIST_FIRST(&lfp->lf_locallock); while (first < end && lop != NULL) { nlop = LIST_NEXT(lop, lo_lckowner); if (first >= lop->lo_end) { /* not there yet */ lop = nlop; } else if (first < lop->lo_first) { /* new one starts before entry in list */ if (end <= lop->lo_first) { /* no overlap between old and new */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, end, cfp, p); if (error != 0) break; first = end; } else { /* handle fragment overlapped with new one */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, lop->lo_first, cfp, p); if (error != 0) break; first = lop->lo_first; } } else { /* new one overlaps this entry in list */ if (end <= lop->lo_end) { /* overlaps all of new one */ error = nfsrv_dolocal(vp, lfp, flags, lop->lo_flags, first, end, cfp, p); if (error != 0) break; first = end; } else { /* handle fragment overlapped with new one */ error = nfsrv_dolocal(vp, lfp, flags, lop->lo_flags, first, lop->lo_end, cfp, p); if (error != 0) break; first = lop->lo_end; lop = nlop; } } } if (first < end && error == 0) /* handle fragment past end of list */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, end, cfp, p); NFSEXITCODE(error); return (error); } /* * Local lock unlock. Unlock all byte ranges that are no longer locked * by NFSv4. To do this, unlock any subranges of first-->end that * do not overlap with the byte ranges of any lock in the lfp->lf_lock * list. This list has all locks for the file held by other * tuples. The list is ordered by increasing * lo_first value, but may have entries that overlap each other, for * the case of read locks. */ static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first, uint64_t init_end, NFSPROC_T *p) { struct nfslock *lop; uint64_t first, end, prevfirst __unused; first = init_first; end = init_end; while (first < init_end) { /* Loop through all nfs locks, adjusting first and end */ prevfirst = 0; LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) { KASSERT(prevfirst <= lop->lo_first, ("nfsv4 locks out of order")); KASSERT(lop->lo_first < lop->lo_end, ("nfsv4 bogus lock")); prevfirst = lop->lo_first; if (first >= lop->lo_first && first < lop->lo_end) /* * Overlaps with initial part, so trim * off that initial part by moving first past * it. */ first = lop->lo_end; else if (end > lop->lo_first && lop->lo_first > first) { /* * This lock defines the end of the * segment to unlock, so set end to the * start of it and break out of the loop. */ end = lop->lo_first; break; } if (first >= end) /* * There is no segment left to do, so * break out of this loop and then exit * the outer while() since first will be set * to end, which must equal init_end here. */ break; } if (first < end) { /* Unlock this segment */ (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK, NFSLCK_READ, first, end, NULL, p); nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK, first, end); } /* * Now move past this segment and look for any further * segment in the range, if there is one. */ first = end; end = init_end; } } /* * Do the local lock operation and update the rollback list, as required. * Perform the rollback and return the error if nfsvno_advlock() fails. */ static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p) { struct nfsrollback *rlp; int error = 0, ltype, oldltype; if (flags & NFSLCK_WRITE) ltype = F_WRLCK; else if (flags & NFSLCK_READ) ltype = F_RDLCK; else ltype = F_UNLCK; if (oldflags & NFSLCK_WRITE) oldltype = F_WRLCK; else if (oldflags & NFSLCK_READ) oldltype = F_RDLCK; else oldltype = F_UNLCK; if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK)) /* nothing to do */ goto out; error = nfsvno_advlock(vp, ltype, first, end, p); if (error != 0) { if (cfp != NULL) { cfp->cl_clientid.lval[0] = 0; cfp->cl_clientid.lval[1] = 0; cfp->cl_first = 0; cfp->cl_end = NFS64BITSSET; cfp->cl_flags = NFSLCK_WRITE; cfp->cl_ownerlen = 5; NFSBCOPY("LOCAL", cfp->cl_owner, 5); } nfsrv_locallock_rollback(vp, lfp, p); } else if (ltype != F_UNLCK) { rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK, M_WAITOK); rlp->rlck_first = first; rlp->rlck_end = end; rlp->rlck_type = oldltype; LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list); } out: NFSEXITCODE(error); return (error); } /* * Roll back local lock changes and free up the rollback list. */ static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p) { struct nfsrollback *rlp, *nrlp; LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) { (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first, rlp->rlck_end, p); free(rlp, M_NFSDROLLBACK); } LIST_INIT(&lfp->lf_rollback); } /* * Update local lock list and delete rollback list (ie now committed to the * local locks). Most of the work is done by the internal function. */ static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end) { struct nfsrollback *rlp, *nrlp; struct nfslock *new_lop, *other_lop; new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); if (flags & (NFSLCK_READ | NFSLCK_WRITE)) other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); else other_lop = NULL; new_lop->lo_flags = flags; new_lop->lo_first = first; new_lop->lo_end = end; nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp); if (new_lop != NULL) free(new_lop, M_NFSDLOCK); if (other_lop != NULL) free(other_lop, M_NFSDLOCK); /* and get rid of the rollback list */ LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) free(rlp, M_NFSDROLLBACK); LIST_INIT(&lfp->lf_rollback); } /* * Lock the struct nfslockfile for local lock updating. */ static void nfsrv_locklf(struct nfslockfile *lfp) { int gotlock; /* lf_usecount ensures *lfp won't be free'd */ lfp->lf_usecount++; do { gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL, NFSSTATEMUTEXPTR, NULL); } while (gotlock == 0); lfp->lf_usecount--; } /* * Unlock the struct nfslockfile after local lock updating. */ static void nfsrv_unlocklf(struct nfslockfile *lfp) { nfsv4_unlock(&lfp->lf_locallock_lck, 0); } /* * Clear out all state for the NFSv4 server. * Must be called by a thread that can sleep when no nfsds are running. */ void nfsrv_throwawayallstate(NFSPROC_T *p) { struct nfsclient *clp, *nclp; struct nfslockfile *lfp, *nlfp; int i; /* * For each client, clean out the state and then free the structure. */ for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } } /* * Also, free up any remaining lock file structures. */ for (i = 0; i < nfsrv_lockhashsize; i++) { LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) { printf("nfsd unload: fnd a lock file struct\n"); nfsrv_freenfslockfile(lfp); } } /* And get rid of the deviceid structures and layouts. */ nfsrv_freealllayoutsanddevids(); } /* * Check the sequence# for the session and slot provided as an argument. * Also, renew the lease if the session will return NFS_OK. */ int nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid, uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this, uint32_t *sflagsp, NFSPROC_T *p) { struct nfsdsession *sep; struct nfssessionhash *shp; int error; shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); return (NFSERR_BADSESSION); } error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp, sep->sess_slots, NULL, NFSV4_SLOTS - 1); if (error != 0) { NFSUNLOCKSESSION(shp); return (error); } if (cache_this != 0) nd->nd_flag |= ND_SAVEREPLY; /* Renew the lease. */ sep->sess_clp->lc_expiry = nfsrv_leaseexpiry(); nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval; nd->nd_flag |= ND_IMPLIEDCLID; /* Save maximum request and reply sizes. */ nd->nd_maxreq = sep->sess_maxreq; nd->nd_maxresp = sep->sess_maxresp; *sflagsp = 0; if (sep->sess_clp->lc_req.nr_client == NULL || (sep->sess_clp->lc_flags & LCL_CBDOWN) != 0) *sflagsp |= NFSV4SEQ_CBPATHDOWN; NFSUNLOCKSESSION(shp); if (error == NFSERR_EXPIRED) { *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED; error = 0; } else if (error == NFSERR_ADMINREVOKED) { *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED; error = 0; } *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1; return (0); } /* * Check/set reclaim complete for this session/clientid. */ int nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs) { struct nfsdsession *sep; struct nfssessionhash *shp; int error = 0; shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (NFSERR_BADSESSION); } if (onefs != 0) sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS; /* Check to see if reclaim complete has already happened. */ else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) error = NFSERR_COMPLETEALREADY; else { sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE; nfsrv_markreclaim(sep->sess_clp); } NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (error); } /* * Cache the reply in a session slot. */ void nfsrv_cache_session(struct nfsrv_descript *nd, struct mbuf **m) { struct nfsdsession *sep; struct nfssessionhash *shp; char *buf, *cp; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) { buf = malloc(INET6_ADDRSTRLEN, M_TEMP, M_WAITOK); switch (nd->nd_nam->sa_family) { #ifdef INET case AF_INET: sin = (struct sockaddr_in *)nd->nd_nam; cp = inet_ntop(sin->sin_family, &sin->sin_addr.s_addr, buf, INET6_ADDRSTRLEN); break; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)nd->nd_nam; cp = inet_ntop(sin6->sin6_family, &sin6->sin6_addr, buf, INET6_ADDRSTRLEN); break; #endif default: cp = NULL; } if (cp != NULL) printf("nfsrv_cache_session: no session " "IPaddr=%s\n", cp); else printf("nfsrv_cache_session: no session\n"); free(buf, M_TEMP); } m_freem(*m); return; } nfsv4_seqsess_cacherep(nd->nd_slotid, sep->sess_slots, nd->nd_repstat, m); NFSUNLOCKSESSION(shp); } /* * Search for a session that matches the sessionid. */ static struct nfsdsession * nfsrv_findsession(uint8_t *sessionid) { struct nfsdsession *sep; struct nfssessionhash *shp; shp = NFSSESSIONHASH(sessionid); LIST_FOREACH(sep, &shp->list, sess_hash) { if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID)) break; } return (sep); } /* * Destroy a session. */ int nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid) { int error, igotlock, samesess; samesess = 0; if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) && (nd->nd_flag & ND_HASSEQUENCE) != 0) { samesess = 1; if ((nd->nd_flag & ND_LASTOP) == 0) return (NFSERR_BADSESSION); } /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0); NFSUNLOCKV4ROOTMUTEX(); error = nfsrv_freesession(NULL, sessionid); if (error == 0 && samesess != 0) nd->nd_flag &= ~ND_HASSEQUENCE; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); return (error); } /* * Bind a connection to a session. * For now, only certain variants are supported, since the current session * structure can only handle a single backchannel entry, which will be * applied to all connections if it is set. */ int nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp) { struct nfssessionhash *shp; struct nfsdsession *sep; struct nfsclient *clp; SVCXPRT *savxprt; int error; error = 0; savxprt = NULL; shp = NFSSESSIONHASH(sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); sep = nfsrv_findsession(sessionid); if (sep != NULL) { clp = sep->sess_clp; if (*foreaftp == NFSCDFC4_BACK || *foreaftp == NFSCDFC4_BACK_OR_BOTH || *foreaftp == NFSCDFC4_FORE_OR_BOTH) { /* Try to set up a backchannel. */ if (clp->lc_req.nr_client == NULL) { NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire " "backchannel\n"); clp->lc_req.nr_client = (struct __rpc_client *) clnt_bck_create(nd->nd_xprt->xp_socket, sep->sess_cbprogram, NFSV4_CBVERS); } if (clp->lc_req.nr_client != NULL) { NFSD_DEBUG(2, "nfsrv_bindconnsess: set up " "backchannel\n"); savxprt = sep->sess_cbsess.nfsess_xprt; SVC_ACQUIRE(nd->nd_xprt); CLNT_ACQUIRE(clp->lc_req.nr_client); nd->nd_xprt->xp_p2 = clp->lc_req.nr_client; /* Disable idle timeout. */ nd->nd_xprt->xp_idletimeout = 0; sep->sess_cbsess.nfsess_xprt = nd->nd_xprt; sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN; clp->lc_flags |= LCL_DONEBINDCONN | LCL_NEEDSCBNULL; clp->lc_flags &= ~LCL_CBDOWN; if (*foreaftp == NFSCDFS4_BACK) *foreaftp = NFSCDFS4_BACK; else *foreaftp = NFSCDFS4_BOTH; } else if (*foreaftp != NFSCDFC4_BACK) { NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set " "up backchannel\n"); sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN; clp->lc_flags |= LCL_DONEBINDCONN; *foreaftp = NFSCDFS4_FORE; } else { error = NFSERR_NOTSUPP; printf("nfsrv_bindconnsess: Can't add " "backchannel\n"); } } else { NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n"); clp->lc_flags |= LCL_DONEBINDCONN; *foreaftp = NFSCDFS4_FORE; } } else error = NFSERR_BADSESSION; NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); if (savxprt != NULL) SVC_RELEASE(savxprt); return (error); } /* * Free up a session structure. */ static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid) { struct nfssessionhash *shp; int i; NFSLOCKSTATE(); if (sep == NULL) { shp = NFSSESSIONHASH(sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(sessionid); } else { shp = NFSSESSIONHASH(sep->sess_sessionid); NFSLOCKSESSION(shp); } if (sep != NULL) { sep->sess_refcnt--; if (sep->sess_refcnt > 0) { NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (NFSERR_BACKCHANBUSY); } LIST_REMOVE(sep, sess_hash); LIST_REMOVE(sep, sess_list); } NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); if (sep == NULL) return (NFSERR_BADSESSION); for (i = 0; i < NFSV4_SLOTS; i++) if (sep->sess_slots[i].nfssl_reply != NULL) m_freem(sep->sess_slots[i].nfssl_reply); if (sep->sess_cbsess.nfsess_xprt != NULL) SVC_RELEASE(sep->sess_cbsess.nfsess_xprt); free(sep, M_NFSDSESSION); return (0); } /* * Free a stateid. * RFC5661 says that it should fail when there are associated opens, locks * or delegations. Since stateids represent opens, I don't see how you can * free an open stateid (it will be free'd when closed), so this function * only works for lock stateids (freeing the lock_owner) or delegations. */ int nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int error; NFSLOCKSTATE(); /* * Look up the stateid */ error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) { /* First, check for a delegation. */ LIST_FOREACH(stp, &clp->lc_deleg, ls_list) { if (!NFSBCMP(stp->ls_stateid.other, stateidp->other, NFSX_STATEIDOTHER)) break; } if (stp != NULL) { nfsrv_freedeleg(stp); NFSUNLOCKSTATE(); return (error); } } /* Not a delegation, try for a lock_owner. */ if (error == 0) error = nfsrv_getstate(clp, stateidp, 0, &stp); if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0)) /* Not a lock_owner stateid. */ error = NFSERR_LOCKSHELD; if (error == 0 && !LIST_EMPTY(&stp->ls_lock)) error = NFSERR_LOCKSHELD; if (error == 0) nfsrv_freelockowner(stp, NULL, 0, p); NFSUNLOCKSTATE(); return (error); } /* * Test a stateid. */ int nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int error; NFSLOCKSTATE(); /* * Look up the stateid */ error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) error = nfsrv_getstate(clp, stateidp, 0, &stp); if (error == 0 && stateidp->seqid != 0 && SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid)) error = NFSERR_OLDSTATEID; NFSUNLOCKSTATE(); return (error); } /* * Generate the xdr for an NFSv4.1 CBSequence Operation. */ static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp, int dont_replycache, struct nfsdsession **sepp, int *slotposp) { struct nfsdsession *sep; uint32_t *tl, slotseq = 0; int maxslot; uint8_t sessionid[NFSX_V4SESSIONID]; int error; error = nfsv4_getcbsession(clp, sepp); if (error != 0) return (error); sep = *sepp; (void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, slotposp, &maxslot, &slotseq, sessionid); KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot")); /* Build the Sequence arguments. */ NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED); bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; nd->nd_slotseq = tl; *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(*slotposp); *tl++ = txdr_unsigned(maxslot); if (dont_replycache == 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl = 0; /* No referring call list, for now. */ nd->nd_flag |= ND_HASSEQUENCE; return (0); } /* * Get a session for the callback. */ static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp) { struct nfsdsession *sep; NFSLOCKSTATE(); LIST_FOREACH(sep, &clp->lc_session, sess_list) { if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) break; } if (sep == NULL) { NFSUNLOCKSTATE(); return (NFSERR_BADSESSION); } sep->sess_refcnt++; *sepp = sep; NFSUNLOCKSTATE(); return (0); } /* * Free up all backchannel xprts. This needs to be done when the nfsd threads * exit, since those transports will all be going away. * This is only called after all the nfsd threads are done performing RPCs, * so locking shouldn't be an issue. */ void nfsrv_freeallbackchannel_xprts(void) { struct nfsdsession *sep; struct nfsclient *clp; SVCXPRT *xprt; int i; for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { LIST_FOREACH(sep, &clp->lc_session, sess_list) { xprt = sep->sess_cbsess.nfsess_xprt; sep->sess_cbsess.nfsess_xprt = NULL; if (xprt != NULL) SVC_RELEASE(xprt); } } } } /* * Do a layout commit. Actually just call nfsrv_updatemdsattr(). * I have no idea if the rest of these arguments will ever be useful? */ int nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype, int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len, int hasnewmtime, struct timespec *newmtimep, int reclaim, nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep, uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p) { struct nfsvattr na; int error; error = nfsrv_updatemdsattr(vp, &na, p); if (error == 0) { *hasnewsizep = 1; *newsizep = na.na_size; } return (error); } /* * Try and get a layout. */ int nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp, int layouttype, int *iomode, uint64_t *offset, uint64_t *len, uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose, int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p) { struct nfslayouthash *lhyp; struct nfslayout *lyp; char *devid; fhandle_t fh, *dsfhp; int error, mirrorcnt; if (nfsrv_devidcnt == 0) return (NFSERR_UNKNLAYOUTTYPE); if (*offset != 0) printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset, (uintmax_t)*len); error = nfsvno_getfh(vp, &fh, p); NFSD_DEBUG(4, "layoutget getfh=%d\n", error); if (error != 0) return (error); /* * For now, all layouts are for entire files. * Only issue Read/Write layouts if requested for a non-readonly fs. */ if (NFSVNO_EXRDONLY(exp)) { if (*iomode == NFSLAYOUTIOMODE_RW) return (NFSERR_LAYOUTTRYLATER); *iomode = NFSLAYOUTIOMODE_READ; } if (*iomode != NFSLAYOUTIOMODE_RW) *iomode = NFSLAYOUTIOMODE_READ; /* * Check to see if a write layout can be issued for this file. * This is used during mirror recovery to avoid RW layouts being * issued for a file while it is being copied to the recovered * mirror. */ if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0) return (NFSERR_LAYOUTTRYLATER); *retonclose = 0; *offset = 0; *len = UINT64_MAX; /* First, see if a layout already exists and return if found. */ lhyp = NFSLAYOUTHASH(&fh); NFSLOCKLAYOUT(lhyp); error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp); NFSD_DEBUG(4, "layoutget findlay=%d\n", error); /* * Not sure if the seqid must be the same, so I won't check it. */ if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] || stateidp->other[1] != lyp->lay_stateid.other[1] || stateidp->other[2] != lyp->lay_stateid.other[2])) { if ((lyp->lay_flags & NFSLAY_CALLB) == 0) { NFSUNLOCKLAYOUT(lhyp); NFSD_DEBUG(1, "ret bad stateid\n"); return (NFSERR_BADSTATEID); } /* * I believe we get here because there is a race between * the client processing the CBLAYOUTRECALL and the layout * being deleted here on the server. * The client has now done a LayoutGet with a non-layout * stateid, as it would when there is no layout. * As such, free this layout and set error == NFSERR_BADSTATEID * so the code below will create a new layout structure as * would happen if no layout was found. * "lyp" will be set before being used below, but set it NULL * as a safety belt. */ nfsrv_freelayout(&lhyp->list, lyp); lyp = NULL; error = NFSERR_BADSTATEID; } if (error == 0) { if (lyp->lay_layoutlen > maxcnt) { NFSUNLOCKLAYOUT(lhyp); NFSD_DEBUG(1, "ret layout too small\n"); return (NFSERR_TOOSMALL); } if (*iomode == NFSLAYOUTIOMODE_RW) lyp->lay_flags |= NFSLAY_RW; else lyp->lay_flags |= NFSLAY_READ; NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen); *layoutlenp = lyp->lay_layoutlen; if (++lyp->lay_stateid.seqid == 0) lyp->lay_stateid.seqid = 1; stateidp->seqid = lyp->lay_stateid.seqid; NFSUNLOCKLAYOUT(lhyp); NFSD_DEBUG(4, "ret fnd layout\n"); return (0); } NFSUNLOCKLAYOUT(lhyp); /* Find the device id and file handle. */ dsfhp = malloc(sizeof(fhandle_t) * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK); devid = malloc(NFSX_V4DEVICEID * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK); error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, dsfhp, devid); NFSD_DEBUG(4, "layoutget devandfh=%d\n", error); if (error == 0) { if (layouttype == NFSLAYOUT_NFSV4_1_FILES) { if (NFSX_V4FILELAYOUT > maxcnt) error = NFSERR_TOOSMALL; else lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp, devid, vp->v_mount->mnt_stat.f_fsid); } else { if (NFSX_V4FLEXLAYOUT(mirrorcnt) > maxcnt) error = NFSERR_TOOSMALL; else lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt, &fh, dsfhp, devid, vp->v_mount->mnt_stat.f_fsid); } } free(dsfhp, M_TEMP); free(devid, M_TEMP); if (error != 0) return (error); /* * Now, add this layout to the list. */ error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p); NFSD_DEBUG(4, "layoutget addl=%d\n", error); /* * The lyp will be set to NULL by nfsrv_addlayout() if it * linked the new structure into the lists. */ free(lyp, M_NFSDSTATE); return (error); } /* * Generate a File Layout. */ static struct nfslayout * nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs) { uint32_t *tl; struct nfslayout *lyp; uint64_t pattern_offset; lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE, M_WAITOK | M_ZERO); lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES; if (iomode == NFSLAYOUTIOMODE_RW) lyp->lay_flags = NFSLAY_RW; else lyp->lay_flags = NFSLAY_READ; NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp)); lyp->lay_clientid.qval = nd->nd_clientid.qval; lyp->lay_fsid = fs; /* Fill in the xdr for the files layout. */ tl = (uint32_t *)lyp->lay_xdr; NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */ tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); /* Set the stripe size to the maximum I/O size. */ - *tl++ = txdr_unsigned(NFS_SRVMAXIO & NFSFLAYUTIL_STRIPE_MASK); + *tl++ = txdr_unsigned(nfs_srvmaxio & NFSFLAYUTIL_STRIPE_MASK); *tl++ = 0; /* 1st stripe index. */ pattern_offset = 0; txdr_hyper(pattern_offset, tl); tl += 2; /* Pattern offset. */ *tl++ = txdr_unsigned(1); /* 1 file handle. */ *tl++ = txdr_unsigned(NFSX_V4PNFSFH); NFSBCOPY(dsfhp, tl, sizeof(*dsfhp)); lyp->lay_layoutlen = NFSX_V4FILELAYOUT; return (lyp); } #define FLEX_OWNERID "999" #define FLEX_UID0 "0" /* * Generate a Flex File Layout. * The FLEX_OWNERID can be any string of 3 decimal digits. Although this * string goes on the wire, it isn't supposed to be used by the client, * since this server uses tight coupling. * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use * a string of "0". This works around the Linux Flex File Layout driver bug * which uses the synthetic uid/gid strings for the "tightly coupled" case. */ static struct nfslayout * nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs) { uint32_t *tl; struct nfslayout *lyp; uint64_t lenval; int i; lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FLEXLAYOUT(mirrorcnt), M_NFSDSTATE, M_WAITOK | M_ZERO); lyp->lay_type = NFSLAYOUT_FLEXFILE; if (iomode == NFSLAYOUTIOMODE_RW) lyp->lay_flags = NFSLAY_RW; else lyp->lay_flags = NFSLAY_READ; NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp)); lyp->lay_clientid.qval = nd->nd_clientid.qval; lyp->lay_fsid = fs; lyp->lay_mirrorcnt = mirrorcnt; /* Fill in the xdr for the files layout. */ tl = (uint32_t *)lyp->lay_xdr; lenval = 0; txdr_hyper(lenval, tl); tl += 2; /* Stripe unit. */ *tl++ = txdr_unsigned(mirrorcnt); /* # of mirrors. */ for (i = 0; i < mirrorcnt; i++) { *tl++ = txdr_unsigned(1); /* One stripe. */ NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */ tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); devid += NFSX_V4DEVICEID; *tl++ = txdr_unsigned(1); /* Efficiency. */ *tl++ = 0; /* Proxy Stateid. */ *tl++ = 0x55555555; *tl++ = 0x55555555; *tl++ = 0x55555555; *tl++ = txdr_unsigned(1); /* 1 file handle. */ *tl++ = txdr_unsigned(NFSX_V4PNFSFH); NFSBCOPY(dsfhp, tl, sizeof(*dsfhp)); tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED); dsfhp++; if (nfsrv_flexlinuxhack != 0) { *tl++ = txdr_unsigned(strlen(FLEX_UID0)); *tl = 0; /* 0 pad string. */ NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0)); *tl++ = txdr_unsigned(strlen(FLEX_UID0)); *tl = 0; /* 0 pad string. */ NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0)); } else { *tl++ = txdr_unsigned(strlen(FLEX_OWNERID)); NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED); *tl++ = txdr_unsigned(strlen(FLEX_OWNERID)); NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED); } } *tl++ = txdr_unsigned(0); /* ff_flags. */ *tl = txdr_unsigned(60); /* Status interval hint. */ lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt); return (lyp); } /* * Parse and process Flex File errors returned via LayoutReturn. */ static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt, NFSPROC_T *p) { uint32_t *tl; int cnt, errcnt, i, j, opnum, stat; char devid[NFSX_V4DEVICEID]; tl = layp; cnt = fxdr_unsigned(int, *tl++); NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt); for (i = 0; i < cnt; i++) { /* Skip offset, length and stateid for now. */ tl += (4 + NFSX_STATEID / NFSX_UNSIGNED); errcnt = fxdr_unsigned(int, *tl++); NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt); for (j = 0; j < errcnt; j++) { NFSBCOPY(tl, devid, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); stat = fxdr_unsigned(int, *tl++); opnum = fxdr_unsigned(int, *tl++); NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum, stat); /* * Except for NFSERR_ACCES and NFSERR_STALE errors, * disable the mirror. */ if (stat != NFSERR_ACCES && stat != NFSERR_STALE) nfsrv_delds(devid, p); } } } /* * This function removes all flex file layouts which has a mirror with * a device id that matches the argument. * Called when the DS represented by the device id has failed. */ void nfsrv_flexmirrordel(char *devid, NFSPROC_T *p) { uint32_t *tl; struct nfslayout *lyp, *nlyp; struct nfslayouthash *lhyp; struct nfslayouthead loclyp; int i, j; NFSD_DEBUG(4, "flexmirrordel\n"); /* Move all layouts found onto a local list. */ TAILQ_INIT(&loclyp); for (i = 0; i < nfsrv_layouthashsize; i++) { lhyp = &nfslayouthash[i]; NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { if (lyp->lay_type == NFSLAYOUT_FLEXFILE && lyp->lay_mirrorcnt > 1) { NFSD_DEBUG(4, "possible match\n"); tl = lyp->lay_xdr; tl += 3; for (j = 0; j < lyp->lay_mirrorcnt; j++) { tl++; if (NFSBCMP(devid, tl, NFSX_V4DEVICEID) == 0) { /* Found one. */ NFSD_DEBUG(4, "fnd one\n"); TAILQ_REMOVE(&lhyp->list, lyp, lay_list); TAILQ_INSERT_HEAD(&loclyp, lyp, lay_list); break; } tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED + NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED + 11 * NFSX_UNSIGNED); } } } NFSUNLOCKLAYOUT(lhyp); } /* Now, try to do a Layout recall for each one found. */ TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) { NFSD_DEBUG(4, "do layout recall\n"); /* * The layout stateid.seqid needs to be incremented * before doing a LAYOUT_RECALL callback. */ if (++lyp->lay_stateid.seqid == 0) lyp->lay_stateid.seqid = 1; nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid, &lyp->lay_fh, lyp, 1, lyp->lay_type, p); nfsrv_freelayout(&loclyp, lyp); } } /* * Do a recall callback to the client for this layout. */ static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p) { struct nfsclient *clp; int error; NFSD_DEBUG(4, "nfsrv_recalllayout\n"); error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, NULL, p); NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error); if (error != 0) { printf("nfsrv_recalllayout: getclient err=%d\n", error); return (error); } if ((clp->lc_flags & LCL_NFSV41) != 0) { error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL, stateidp, changed, fhp, NULL, NULL, laytype, p); /* If lyp != NULL, handle an error return here. */ if (error != 0 && lyp != NULL) { NFSDRECALLLOCK(); /* * Mark it returned, since no layout recall * has been done. * All errors seem to be non-recoverable, although * NFSERR_NOMATCHLAYOUT is a normal event. */ if ((lyp->lay_flags & NFSLAY_RECALL) != 0) { lyp->lay_flags |= NFSLAY_RETURNED; wakeup(lyp); } NFSDRECALLUNLOCK(); if (error != NFSERR_NOMATCHLAYOUT) printf("nfsrv_recalllayout: err=%d\n", error); } } else printf("nfsrv_recalllayout: clp not NFSv4.1\n"); return (error); } /* * Find a layout to recall when we exceed our high water mark. */ void nfsrv_recalloldlayout(NFSPROC_T *p) { struct nfslayouthash *lhyp; struct nfslayout *lyp; nfsquad_t clientid; nfsv4stateid_t stateid; fhandle_t fh; int error, laytype = 0, ret; lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize]; NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) { if ((lyp->lay_flags & NFSLAY_CALLB) == 0) { lyp->lay_flags |= NFSLAY_CALLB; /* * The layout stateid.seqid needs to be incremented * before doing a LAYOUT_RECALL callback. */ if (++lyp->lay_stateid.seqid == 0) lyp->lay_stateid.seqid = 1; clientid = lyp->lay_clientid; stateid = lyp->lay_stateid; NFSBCOPY(&lyp->lay_fh, &fh, sizeof(fh)); laytype = lyp->lay_type; break; } } NFSUNLOCKLAYOUT(lhyp); if (lyp != NULL) { error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, 0, laytype, p); if (error != 0 && error != NFSERR_NOMATCHLAYOUT) NFSD_DEBUG(4, "recallold=%d\n", error); if (error != 0) { NFSLOCKLAYOUT(lhyp); /* * Since the hash list was unlocked, we need to * find it again. */ ret = nfsrv_findlayout(&clientid, &fh, laytype, p, &lyp); if (ret == 0 && (lyp->lay_flags & NFSLAY_CALLB) != 0 && lyp->lay_stateid.other[0] == stateid.other[0] && lyp->lay_stateid.other[1] == stateid.other[1] && lyp->lay_stateid.other[2] == stateid.other[2]) { /* * The client no longer knows this layout, so * it can be free'd now. */ if (error == NFSERR_NOMATCHLAYOUT) nfsrv_freelayout(&lhyp->list, lyp); else { /* * Leave it to be tried later by * clearing NFSLAY_CALLB and moving * it to the head of the list, so it * won't be tried again for a while. */ lyp->lay_flags &= ~NFSLAY_CALLB; TAILQ_REMOVE(&lhyp->list, lyp, lay_list); TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list); } } NFSUNLOCKLAYOUT(lhyp); } } } /* * Try and return layout(s). */ int nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp, int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim, int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp, struct ucred *cred, NFSPROC_T *p) { struct nfsvattr na; struct nfslayouthash *lhyp; struct nfslayout *lyp; fhandle_t fh; int error = 0; *fndp = 0; if (kind == NFSV4LAYOUTRET_FILE) { error = nfsvno_getfh(vp, &fh, p); if (error == 0) { error = nfsrv_updatemdsattr(vp, &na, p); if (error != 0) printf("nfsrv_layoutreturn: updatemdsattr" " failed=%d\n", error); } if (error == 0) { if (reclaim == newnfs_true) { error = nfsrv_checkgrace(NULL, NULL, NFSLCK_RECLAIM); if (error != NFSERR_NOGRACE) error = 0; return (error); } lhyp = NFSLAYOUTHASH(&fh); NFSDRECALLLOCK(); NFSLOCKLAYOUT(lhyp); error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp); NFSD_DEBUG(4, "layoutret findlay=%d\n", error); if (error == 0 && stateidp->other[0] == lyp->lay_stateid.other[0] && stateidp->other[1] == lyp->lay_stateid.other[1] && stateidp->other[2] == lyp->lay_stateid.other[2]) { NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d" " %x %x %x laystateid %d %x %x %x" " off=%ju len=%ju flgs=0x%x\n", stateidp->seqid, stateidp->other[0], stateidp->other[1], stateidp->other[2], lyp->lay_stateid.seqid, lyp->lay_stateid.other[0], lyp->lay_stateid.other[1], lyp->lay_stateid.other[2], (uintmax_t)offset, (uintmax_t)len, lyp->lay_flags); if (++lyp->lay_stateid.seqid == 0) lyp->lay_stateid.seqid = 1; stateidp->seqid = lyp->lay_stateid.seqid; if (offset == 0 && len == UINT64_MAX) { if ((iomode & NFSLAYOUTIOMODE_READ) != 0) lyp->lay_flags &= ~NFSLAY_READ; if ((iomode & NFSLAYOUTIOMODE_RW) != 0) lyp->lay_flags &= ~NFSLAY_RW; if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0) nfsrv_freelayout(&lhyp->list, lyp); else *fndp = 1; } else *fndp = 1; } NFSUNLOCKLAYOUT(lhyp); /* Search the nfsrv_recalllist for a match. */ TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) { if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 && lyp->lay_clientid.qval == nd->nd_clientid.qval && stateidp->other[0] == lyp->lay_stateid.other[0] && stateidp->other[1] == lyp->lay_stateid.other[1] && stateidp->other[2] == lyp->lay_stateid.other[2]) { lyp->lay_flags |= NFSLAY_RETURNED; wakeup(lyp); error = 0; } } NFSDRECALLUNLOCK(); } if (layouttype == NFSLAYOUT_FLEXFILE) nfsrv_flexlayouterr(nd, layp, maxcnt, p); } else if (kind == NFSV4LAYOUTRET_FSID) nfsrv_freelayouts(&nd->nd_clientid, &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode); else if (kind == NFSV4LAYOUTRET_ALL) nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode); else error = NFSERR_INVAL; if (error == -1) error = 0; return (error); } /* * Look for an existing layout. */ static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype, NFSPROC_T *p, struct nfslayout **lypp) { struct nfslayouthash *lhyp; struct nfslayout *lyp; int ret; *lypp = NULL; ret = 0; lhyp = NFSLAYOUTHASH(fhp); TAILQ_FOREACH(lyp, &lhyp->list, lay_list) { if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 && lyp->lay_clientid.qval == clientidp->qval && lyp->lay_type == laytype) break; } if (lyp != NULL) *lypp = lyp; else ret = -1; return (ret); } /* * Add the new layout, as required. */ static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp, nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p) { struct nfsclient *clp; struct nfslayouthash *lhyp; struct nfslayout *lyp, *nlyp; fhandle_t *fhp; int error; KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0, ("nfsrv_layoutget: no nd_clientid\n")); lyp = *lypp; fhp = &lyp->lay_fh; NFSLOCKSTATE(); error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error != 0) { NFSUNLOCKSTATE(); return (error); } lyp->lay_stateid.seqid = stateidp->seqid = 1; lyp->lay_stateid.other[0] = stateidp->other[0] = clp->lc_clientid.lval[0]; lyp->lay_stateid.other[1] = stateidp->other[1] = clp->lc_clientid.lval[1]; lyp->lay_stateid.other[2] = stateidp->other[2] = nfsrv_nextstateindex(clp); NFSUNLOCKSTATE(); lhyp = NFSLAYOUTHASH(fhp); NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) { if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 && nlyp->lay_clientid.qval == nd->nd_clientid.qval) break; } if (nlyp != NULL) { /* A layout already exists, so use it. */ nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)); NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen); *layoutlenp = nlyp->lay_layoutlen; if (++nlyp->lay_stateid.seqid == 0) nlyp->lay_stateid.seqid = 1; stateidp->seqid = nlyp->lay_stateid.seqid; stateidp->other[0] = nlyp->lay_stateid.other[0]; stateidp->other[1] = nlyp->lay_stateid.other[1]; stateidp->other[2] = nlyp->lay_stateid.other[2]; NFSUNLOCKLAYOUT(lhyp); return (0); } /* Insert the new layout in the lists. */ *lypp = NULL; atomic_add_int(&nfsrv_layoutcnt, 1); NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen); *layoutlenp = lyp->lay_layoutlen; TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list); NFSUNLOCKLAYOUT(lhyp); return (0); } /* * Get the devinfo for a deviceid. */ int nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt, uint32_t *notify, int *devaddrlen, char **devaddr) { struct nfsdevice *ds; if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype != NFSLAYOUT_FLEXFILE) || (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES)) return (NFSERR_UNKNLAYOUTTYPE); /* * Now, search for the device id. Note that the structures won't go * away, but the order changes in the list. As such, the lock only * needs to be held during the search through the list. */ NFSDDSLOCK(); TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 && ds->nfsdev_nmp != NULL) break; } NFSDDSUNLOCK(); if (ds == NULL) return (NFSERR_NOENT); /* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */ *devaddrlen = 0; if (layouttype == NFSLAYOUT_NFSV4_1_FILES) { *devaddrlen = ds->nfsdev_fileaddrlen; *devaddr = ds->nfsdev_fileaddr; } else if (layouttype == NFSLAYOUT_FLEXFILE) { *devaddrlen = ds->nfsdev_flexaddrlen; *devaddr = ds->nfsdev_flexaddr; } if (*devaddrlen == 0) return (NFSERR_UNKNLAYOUTTYPE); /* * The XDR overhead is 3 unsigned values: layout_type, * length_of_address and notify bitmap. * If the notify array is changed to not all zeros, the * count of unsigned values must be increased. */ if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED) { *maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED; return (NFSERR_TOOSMALL); } return (0); } /* * Free a list of layout state structures. */ static void nfsrv_freelayoutlist(nfsquad_t clientid) { struct nfslayouthash *lhyp; struct nfslayout *lyp, *nlyp; int i; for (i = 0; i < nfsrv_layouthashsize; i++) { lhyp = &nfslayouthash[i]; NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { if (lyp->lay_clientid.qval == clientid.qval) nfsrv_freelayout(&lhyp->list, lyp); } NFSUNLOCKLAYOUT(lhyp); } } /* * Free up a layout. */ static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp) { NFSD_DEBUG(4, "Freelayout=%p\n", lyp); atomic_add_int(&nfsrv_layoutcnt, -1); TAILQ_REMOVE(lhp, lyp, lay_list); free(lyp, M_NFSDSTATE); } /* * Free up a device id. */ void nfsrv_freeonedevid(struct nfsdevice *ds) { int i; atomic_add_int(&nfsrv_devidcnt, -1); vrele(ds->nfsdev_dvp); for (i = 0; i < nfsrv_dsdirsize; i++) if (ds->nfsdev_dsdir[i] != NULL) vrele(ds->nfsdev_dsdir[i]); free(ds->nfsdev_fileaddr, M_NFSDSTATE); free(ds->nfsdev_flexaddr, M_NFSDSTATE); free(ds->nfsdev_host, M_NFSDSTATE); free(ds, M_NFSDSTATE); } /* * Free up a device id and its mirrors. */ static void nfsrv_freedevid(struct nfsdevice *ds) { TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); nfsrv_freeonedevid(ds); } /* * Free all layouts and device ids. * Done when the nfsd threads are shut down since there may be a new * modified device id list created when the nfsd is restarted. */ void nfsrv_freealllayoutsanddevids(void) { struct nfsdontlist *mrp, *nmrp; struct nfslayout *lyp, *nlyp; /* Get rid of the deviceid structures. */ nfsrv_freealldevids(); TAILQ_INIT(&nfsrv_devidhead); nfsrv_devidcnt = 0; /* Get rid of all layouts. */ nfsrv_freealllayouts(); /* Get rid of any nfsdontlist entries. */ LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp) free(mrp, M_NFSDSTATE); LIST_INIT(&nfsrv_dontlisthead); nfsrv_dontlistlen = 0; /* Free layouts in the recall list. */ TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp) nfsrv_freelayout(&nfsrv_recalllisthead, lyp); TAILQ_INIT(&nfsrv_recalllisthead); } /* * Free layouts that match the arguments. */ static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode) { struct nfslayouthash *lhyp; struct nfslayout *lyp, *nlyp; int i; for (i = 0; i < nfsrv_layouthashsize; i++) { lhyp = &nfslayouthash[i]; NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { if (clid->qval != lyp->lay_clientid.qval) continue; if (fs != NULL && fsidcmp(fs, &lyp->lay_fsid) != 0) continue; if (laytype != lyp->lay_type) continue; if ((iomode & NFSLAYOUTIOMODE_READ) != 0) lyp->lay_flags &= ~NFSLAY_READ; if ((iomode & NFSLAYOUTIOMODE_RW) != 0) lyp->lay_flags &= ~NFSLAY_RW; if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0) nfsrv_freelayout(&lhyp->list, lyp); } NFSUNLOCKLAYOUT(lhyp); } } /* * Free all layouts for the argument file. */ void nfsrv_freefilelayouts(fhandle_t *fhp) { struct nfslayouthash *lhyp; struct nfslayout *lyp, *nlyp; lhyp = NFSLAYOUTHASH(fhp); NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0) nfsrv_freelayout(&lhyp->list, lyp); } NFSUNLOCKLAYOUT(lhyp); } /* * Free all layouts. */ static void nfsrv_freealllayouts(void) { struct nfslayouthash *lhyp; struct nfslayout *lyp, *nlyp; int i; for (i = 0; i < nfsrv_layouthashsize; i++) { lhyp = &nfslayouthash[i]; NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) nfsrv_freelayout(&lhyp->list, lyp); NFSUNLOCKLAYOUT(lhyp); } } /* * Look up the mount path for the DS server. */ static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p, struct nfsdevice **dsp) { struct nameidata nd; struct nfsdevice *ds; struct mount *mp; int error, i; char *dsdirpath; size_t dsdirsize; NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp); *dsp = NULL; NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, dspathp, p); error = namei(&nd); NFSD_DEBUG(4, "lookup=%d\n", error); if (error != 0) return (error); if (nd.ni_vp->v_type != VDIR) { vput(nd.ni_vp); NFSD_DEBUG(4, "dspath not dir\n"); return (ENOTDIR); } if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); NFSD_DEBUG(4, "dspath not an NFS mount\n"); return (ENXIO); } /* * Allocate a DS server structure with the NFS mounted directory * vnode reference counted, so that a non-forced dismount will * fail with EBUSY. * This structure is always linked into the list, even if an error * is being returned. The caller will free the entire list upon * an error return. */ *dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t), M_NFSDSTATE, M_WAITOK | M_ZERO); ds->nfsdev_dvp = nd.ni_vp; ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount); NFSVOPUNLOCK(nd.ni_vp); dsdirsize = strlen(dspathp) + 16; dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK); /* Now, create the DS directory structures. */ for (i = 0; i < nfsrv_dsdirsize; i++) { snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i); NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, dsdirpath, p); error = namei(&nd); NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error); if (error != 0) break; if (nd.ni_vp->v_type != VDIR) { vput(nd.ni_vp); error = ENOTDIR; NFSD_DEBUG(4, "dsdirpath not a VDIR\n"); break; } if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); error = ENXIO; NFSD_DEBUG(4, "dsdirpath not an NFS mount\n"); break; } ds->nfsdev_dsdir[i] = nd.ni_vp; NFSVOPUNLOCK(nd.ni_vp); } free(dsdirpath, M_TEMP); if (strlen(mdspathp) > 0) { /* * This DS stores file for a specific MDS exported file * system. */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, mdspathp, p); error = namei(&nd); NFSD_DEBUG(4, "mds lookup=%d\n", error); if (error != 0) goto out; if (nd.ni_vp->v_type != VDIR) { vput(nd.ni_vp); error = ENOTDIR; NFSD_DEBUG(4, "mdspath not dir\n"); goto out; } mp = nd.ni_vp->v_mount; if ((mp->mnt_flag & MNT_EXPORTED) == 0) { vput(nd.ni_vp); error = ENXIO; NFSD_DEBUG(4, "mdspath not an exported fs\n"); goto out; } ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid; ds->nfsdev_mdsisset = 1; vput(nd.ni_vp); } out: TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); atomic_add_int(&nfsrv_devidcnt, 1); return (error); } /* * Look up the mount path for the DS server and delete it. */ int nfsrv_deldsserver(int op, char *dspathp, NFSPROC_T *p) { struct mount *mp; struct nfsmount *nmp; struct nfsdevice *ds; int error; NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp); /* * Search for the path in the mount list. Avoid looking the path * up, since this mount point may be hung, with associated locked * vnodes, etc. * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked * until this completes. * As noted in the man page, this should be done before any forced * dismount on the mount point, but at least the handshake on * NFSMNTP_CANCELRPCS should make it safe. */ error = 0; ds = NULL; nmp = NULL; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 && strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 && mp->mnt_data != NULL) { nmp = VFSTONFS(mp); NFSLOCKMNT(nmp); if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { nmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(nmp); } else { NFSUNLOCKMNT(nmp); nmp = NULL; } break; } } mtx_unlock(&mountlist_mtx); if (nmp != NULL) { ds = nfsrv_deldsnmp(op, nmp, p); NFSD_DEBUG(4, "deldsnmp=%p\n", ds); if (ds != NULL) { nfsrv_killrpcs(nmp); NFSD_DEBUG(4, "aft killrpcs\n"); } else error = ENXIO; NFSLOCKMNT(nmp); nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(nmp); NFSUNLOCKMNT(nmp); } else error = EINVAL; return (error); } /* * Search for and remove a DS entry which matches the "nmp" argument. * The nfsdevice structure pointer is returned so that the caller can * free it via nfsrv_freeonedevid(). * For the forced case, do not try to do LayoutRecalls, since the server * must be shut down now anyhow. */ struct nfsdevice * nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p) { struct nfsdevice *fndds; NFSD_DEBUG(4, "deldsdvp\n"); NFSDDSLOCK(); if (op == PNFSDOP_FORCEDELDS) fndds = nfsv4_findmirror(nmp); else fndds = nfsrv_findmirroredds(nmp); if (fndds != NULL) nfsrv_deleteds(fndds); NFSDDSUNLOCK(); if (fndds != NULL) { if (op != PNFSDOP_FORCEDELDS) nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p); printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host); } return (fndds); } /* * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid. * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount * point. * Also, returns an error instead of the nfsdevice found. */ int nfsrv_delds(char *devid, NFSPROC_T *p) { struct nfsdevice *ds, *fndds; struct nfsmount *nmp; int fndmirror; NFSD_DEBUG(4, "delds\n"); /* * Search the DS server list for a match with devid. * Remove the DS entry if found and there is a mirror. */ fndds = NULL; nmp = NULL; fndmirror = 0; NFSDDSLOCK(); TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 && ds->nfsdev_nmp != NULL) { NFSD_DEBUG(4, "fnd main ds\n"); fndds = ds; break; } } if (fndds == NULL) { NFSDDSUNLOCK(); return (ENXIO); } if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0) fndmirror = 1; else if (fndds->nfsdev_mdsisset != 0) { /* For the fsid is set case, search for a mirror. */ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds != fndds && ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && fsidcmp(&ds->nfsdev_mdsfsid, &fndds->nfsdev_mdsfsid) == 0) { fndmirror = 1; break; } } } if (fndmirror != 0) { nmp = fndds->nfsdev_nmp; NFSLOCKMNT(nmp); if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { nmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(nmp); nfsrv_deleteds(fndds); } else { NFSUNLOCKMNT(nmp); nmp = NULL; } } NFSDDSUNLOCK(); if (nmp != NULL) { nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p); printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host); nfsrv_killrpcs(nmp); NFSLOCKMNT(nmp); nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(nmp); NFSUNLOCKMNT(nmp); return (0); } return (ENXIO); } /* * Mark a DS as disabled by setting nfsdev_nmp = NULL. */ static void nfsrv_deleteds(struct nfsdevice *fndds) { NFSD_DEBUG(4, "deleteds: deleting a mirror\n"); fndds->nfsdev_nmp = NULL; if (fndds->nfsdev_mdsisset == 0) nfsrv_faildscnt--; } /* * Fill in the addr structures for the File and Flex File layouts. */ static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost) { uint32_t *tl; char *netprot; int addrlen; static uint64_t new_devid = 0; if (strchr(addr, ':') != NULL) netprot = "tcp6"; else netprot = "tcp"; /* Fill in the device id. */ NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time)); new_devid++; NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)], sizeof(new_devid)); /* * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4 * as defined in RFC5661) in XDR. */ addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) + 6 * NFSX_UNSIGNED; NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot); ds->nfsdev_fileaddrlen = addrlen; tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO); ds->nfsdev_fileaddr = (char *)tl; *tl++ = txdr_unsigned(1); /* One stripe with index 0. */ *tl++ = 0; *tl++ = txdr_unsigned(1); /* One multipath list */ *tl++ = txdr_unsigned(1); /* with one entry in it. */ /* The netaddr for this one entry. */ *tl++ = txdr_unsigned(strlen(netprot)); NFSBCOPY(netprot, tl, strlen(netprot)); tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED); *tl++ = txdr_unsigned(strlen(addr)); NFSBCOPY(addr, tl, strlen(addr)); /* * Fill in the flex file addr (actually the ff_device_addr4 * as defined for Flexible File Layout) in XDR. */ addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) + 14 * NFSX_UNSIGNED; ds->nfsdev_flexaddrlen = addrlen; tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO); ds->nfsdev_flexaddr = (char *)tl; *tl++ = txdr_unsigned(1); /* One multipath entry. */ /* The netaddr for this one entry. */ *tl++ = txdr_unsigned(strlen(netprot)); NFSBCOPY(netprot, tl, strlen(netprot)); tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED); *tl++ = txdr_unsigned(strlen(addr)); NFSBCOPY(addr, tl, strlen(addr)); tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED); *tl++ = txdr_unsigned(2); /* Two NFS Versions. */ *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */ *tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */ - *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */ - *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max wsize. */ + *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */ + *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */ *tl++ = newnfs_true; /* Tightly coupled. */ *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */ *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */ - *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */ - *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max wsize. */ + *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */ + *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */ *tl = newnfs_true; /* Tightly coupled. */ ds->nfsdev_hostnamelen = strlen(dnshost); ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE, M_WAITOK); NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1); } /* * Create the device id list. * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument * is misconfigured. */ int nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p) { struct nfsdevice *ds; char *addrp, *dnshostp, *dspathp, *mdspathp; int error, i; addrp = args->addr; dnshostp = args->dnshost; dspathp = args->dspath; mdspathp = args->mdspath; nfsrv_maxpnfsmirror = args->mirrorcnt; if (addrp == NULL || dnshostp == NULL || dspathp == NULL || mdspathp == NULL) return (0); /* * Loop around for each nul-terminated string in args->addr, * args->dnshost, args->dnspath and args->mdspath. */ while (addrp < (args->addr + args->addrlen) && dnshostp < (args->dnshost + args->dnshostlen) && dspathp < (args->dspath + args->dspathlen) && mdspathp < (args->mdspath + args->mdspathlen)) { error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds); if (error != 0) { /* Free all DS servers. */ nfsrv_freealldevids(); nfsrv_devidcnt = 0; return (ENXIO); } nfsrv_allocdevid(ds, addrp, dnshostp); addrp += (strlen(addrp) + 1); dnshostp += (strlen(dnshostp) + 1); dspathp += (strlen(dspathp) + 1); mdspathp += (strlen(mdspathp) + 1); } if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) { /* Free all DS servers. */ nfsrv_freealldevids(); nfsrv_devidcnt = 0; nfsrv_maxpnfsmirror = 1; return (ENXIO); } /* We can fail at most one less DS than the mirror level. */ nfsrv_faildscnt = nfsrv_maxpnfsmirror - 1; /* * Allocate the nfslayout hash table now, since this is a pNFS server. * Make it 1% of the high water mark and at least 100. */ if (nfslayouthash == NULL) { nfsrv_layouthashsize = nfsrv_layouthighwater / 100; if (nfsrv_layouthashsize < 100) nfsrv_layouthashsize = 100; nfslayouthash = mallocarray(nfsrv_layouthashsize, sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_layouthashsize; i++) { mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF); TAILQ_INIT(&nfslayouthash[i].list); } } return (0); } /* * Free all device ids. */ static void nfsrv_freealldevids(void) { struct nfsdevice *ds, *nds; TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds) nfsrv_freedevid(ds); } /* * Check to see if there is a Read/Write Layout plus either: * - A Write Delegation * or * - An Open with Write_access. * Return 1 if this is the case and 0 otherwise. * This function is used by nfsrv_proxyds() to decide if doing a Proxy * Getattr RPC to the Data Server (DS) is necessary. */ #define NFSCLIDVECSIZE 6 int nfsrv_checkdsattr(vnode_t vp, NFSPROC_T *p) { fhandle_t fh, *tfhp; struct nfsstate *stp; struct nfslayout *lyp; struct nfslayouthash *lhyp; struct nfslockhashhead *hp; struct nfslockfile *lfp; nfsquad_t clid[NFSCLIDVECSIZE]; int clidcnt, ret; ret = nfsvno_getfh(vp, &fh, p); if (ret != 0) return (0); /* First check for a Read/Write Layout. */ clidcnt = 0; lhyp = NFSLAYOUTHASH(&fh); NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH(lyp, &lhyp->list, lay_list) { if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 && ((lyp->lay_flags & NFSLAY_RW) != 0 || ((lyp->lay_flags & NFSLAY_READ) != 0 && nfsrv_pnfsatime != 0))) { if (clidcnt < NFSCLIDVECSIZE) clid[clidcnt].qval = lyp->lay_clientid.qval; clidcnt++; } } NFSUNLOCKLAYOUT(lhyp); if (clidcnt == 0) { /* None found, so return 0. */ return (0); } /* Get the nfslockfile for this fh. */ NFSLOCKSTATE(); hp = NFSLOCKHASH(&fh); LIST_FOREACH(lfp, hp, lf_hash) { tfhp = &lfp->lf_fh; if (NFSVNO_CMPFH(&fh, tfhp)) break; } if (lfp == NULL) { /* None found, so return 0. */ NFSUNLOCKSTATE(); return (0); } /* Now, look for a Write delegation for this clientid. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 && nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0) break; } if (stp != NULL) { /* Found one, so return 1. */ NFSUNLOCKSTATE(); return (1); } /* No Write delegation, so look for an Open with Write_access. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0, ("nfsrv_checkdsattr: Non-open in Open list\n")); if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 && nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0) break; } NFSUNLOCKSTATE(); if (stp != NULL) return (1); return (0); } /* * Look for a matching clientid in the vector. Return 1 if one might match. */ static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt) { int i; /* If too many for the vector, return 1 since there might be a match. */ if (clidcnt > NFSCLIDVECSIZE) return (1); for (i = 0; i < clidcnt; i++) if (clidvec[i].qval == clid.qval) return (1); return (0); } /* * Check the don't list for "vp" and see if issuing an rw layout is allowed. * Return 1 if issuing an rw layout isn't allowed, 0 otherwise. */ static int nfsrv_dontlayout(fhandle_t *fhp) { struct nfsdontlist *mrp; int ret; if (nfsrv_dontlistlen == 0) return (0); ret = 0; NFSDDONTLISTLOCK(); LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) { if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 && (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) { ret = 1; break; } } NFSDDONTLISTUNLOCK(); return (ret); } #define PNFSDS_COPYSIZ 65536 /* * Create a new file on a DS and copy the contents of an extant DS file to it. * This can be used for recovery of a DS file onto a recovered DS. * The steps are: * - When called, the MDS file's vnode is locked, blocking LayoutGet operations. * - Disable issuing of read/write layouts for the file via the nfsdontlist, * so that they will be disabled after the MDS file's vnode is unlocked. * - Set up the nfsrv_recalllist so that recall of read/write layouts can * be done. * - Unlock the MDS file's vnode, so that the client(s) can perform proxied * writes, LayoutCommits and LayoutReturns for the file when completing the * LayoutReturn requested by the LayoutRecall callback. * - Issue a LayoutRecall callback for all read/write layouts and wait for * them to be returned. (If the LayoutRecall callback replies * NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.) * - Exclusively lock the MDS file's vnode. This ensures that no proxied * writes are in progress or can occur during the DS file copy. * It also blocks Setattr operations. * - Create the file on the recovered mirror. * - Copy the file from the operational DS. * - Copy any ACL from the MDS file to the new DS file. * - Set the modify time of the new DS file to that of the MDS file. * - Update the extended attribute for the MDS file. * - Enable issuing of rw layouts by deleting the nfsdontlist entry. * - The caller will unlock the MDS file's vnode allowing operations * to continue normally, since it is now on the mirror again. */ int nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds, struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt, struct ucred *cred, NFSPROC_T *p) { struct nfsdontlist *mrp, *nmrp; struct nfslayouthash *lhyp; struct nfslayout *lyp, *nlyp; struct nfslayouthead thl; struct mount *mp, *tvmp; struct acl *aclp; struct vattr va; struct timespec mtime; fhandle_t fh; vnode_t tvp; off_t rdpos, wrpos; ssize_t aresid; char *dat; int didprintf, ret, retacl, xfer; ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp"); ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp"); /* * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag * so that no more RW layouts will get issued. */ ret = nfsvno_getfh(vp, &fh, p); if (ret != 0) { NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret); return (ret); } nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK); nmrp->nfsmr_flags = NFSMR_DONTLAYOUT; NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh)); NFSDDONTLISTLOCK(); LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) { if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0) break; } if (mrp == NULL) { LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list); mrp = nmrp; nmrp = NULL; nfsrv_dontlistlen++; NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n"); } else { NFSDDONTLISTUNLOCK(); free(nmrp, M_NFSDSTATE); NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n"); return (ENXIO); } NFSDDONTLISTUNLOCK(); /* * Search for all RW layouts for this file. Move them to the * recall list, so they can be recalled and their return noted. */ lhyp = NFSLAYOUTHASH(&fh); NFSDRECALLLOCK(); NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 && (lyp->lay_flags & NFSLAY_RW) != 0) { TAILQ_REMOVE(&lhyp->list, lyp, lay_list); TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list); lyp->lay_trycnt = 0; } } NFSUNLOCKLAYOUT(lhyp); NFSDRECALLUNLOCK(); ret = 0; mp = tvmp = NULL; didprintf = 0; TAILQ_INIT(&thl); /* Unlock the MDS vp, so that a LayoutReturn can be done on it. */ NFSVOPUNLOCK(vp); /* Now, do a recall for all layouts not yet recalled. */ tryagain: NFSDRECALLLOCK(); TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) { if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 && (lyp->lay_flags & NFSLAY_RECALL) == 0) { lyp->lay_flags |= NFSLAY_RECALL; /* * The layout stateid.seqid needs to be incremented * before doing a LAYOUT_RECALL callback. */ if (++lyp->lay_stateid.seqid == 0) lyp->lay_stateid.seqid = 1; NFSDRECALLUNLOCK(); nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid, &lyp->lay_fh, lyp, 0, lyp->lay_type, p); NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n"); goto tryagain; } } /* Now wait for them to be returned. */ tryagain2: TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) { if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) { if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) { TAILQ_REMOVE(&nfsrv_recalllisthead, lyp, lay_list); TAILQ_INSERT_HEAD(&thl, lyp, lay_list); NFSD_DEBUG(4, "nfsrv_copymr: layout returned\n"); } else { lyp->lay_trycnt++; ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR, PVFS | PCATCH, "nfsmrl", hz); NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n", ret); if (ret == EINTR || ret == ERESTART) break; if ((lyp->lay_flags & NFSLAY_RETURNED) == 0) { /* * Give up after 60sec and return * ENXIO, failing the copymr. * This layout will remain on the * recalllist. It can only be cleared * by restarting the nfsd. * This seems the safe way to handle * it, since it cannot be safely copied * with an outstanding RW layout. */ if (lyp->lay_trycnt >= 60) { ret = ENXIO; break; } if (didprintf == 0) { printf("nfsrv_copymr: layout " "not returned\n"); didprintf = 1; } } } goto tryagain2; } } NFSDRECALLUNLOCK(); /* We can now get rid of the layouts that have been returned. */ TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp) nfsrv_freelayout(&thl, lyp); /* * Do the vn_start_write() calls here, before the MDS vnode is * locked and the tvp is created (locked) in the NFS file system * that dvp is in. * For tvmp, this probably isn't necessary, since it will be an * NFS mount and they are not suspendable at this time. */ if (ret == 0) ret = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (ret == 0) { tvmp = dvp->v_mount; ret = vn_start_write(NULL, &tvmp, V_WAIT | PCATCH); } /* * LK_EXCLUSIVE lock the MDS vnode, so that any * proxied writes through the MDS will be blocked until we have * completed the copy and update of the extended attributes. * This will also ensure that any attributes and ACL will not be * changed until the copy is complete. */ NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (ret == 0 && VN_IS_DOOMED(vp)) { NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n"); ret = ESTALE; } /* Create the data file on the recovered DS. */ if (ret == 0) ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp); /* Copy the DS file, if created successfully. */ if (ret == 0) { /* * Get any NFSv4 ACL on the MDS file, so that it can be set * on the new DS file. */ aclp = acl_alloc(M_WAITOK | M_ZERO); retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); if (retacl != 0 && retacl != ENOATTR) NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl); dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK); /* Malloc a block of 0s used to check for holes. */ if (nfsrv_zeropnfsdat == NULL) nfsrv_zeropnfsdat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK | M_ZERO); rdpos = wrpos = 0; ret = VOP_GETATTR(fvp, &va, cred); aresid = 0; while (ret == 0 && aresid == 0) { ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ, rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL, &aresid, p); xfer = PNFSDS_COPYSIZ - aresid; if (ret == 0 && xfer > 0) { rdpos += xfer; /* * Skip the write for holes, except for the * last block. */ if (xfer < PNFSDS_COPYSIZ || rdpos == va.va_size || NFSBCMP(dat, nfsrv_zeropnfsdat, PNFSDS_COPYSIZ) != 0) ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer, wrpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL, NULL, p); if (ret == 0) wrpos += xfer; } } /* If there is an ACL and the copy succeeded, set the ACL. */ if (ret == 0 && retacl == 0) { ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p); /* * Don't consider these as errors, since VOP_GETACL() * can return an ACL when they are not actually * supported. For example, for UFS, VOP_GETACL() * will return a trivial ACL based on the uid/gid/mode * when there is no ACL on the file. * This case should be recognized as a trivial ACL * by UFS's VOP_SETACL() and succeed, but... */ if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM) ret = 0; } if (ret == 0) ret = VOP_FSYNC(tvp, MNT_WAIT, p); /* Set the DS data file's modify time that of the MDS file. */ if (ret == 0) ret = VOP_GETATTR(vp, &va, cred); if (ret == 0) { mtime = va.va_mtime; VATTR_NULL(&va); va.va_mtime = mtime; ret = VOP_SETATTR(tvp, &va, cred); } vput(tvp); acl_free(aclp); free(dat, M_TEMP); } if (tvmp != NULL) vn_finished_write(tvmp); /* Update the extended attributes for the newly created DS file. */ if (ret == 0) ret = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*wpf) * mirrorcnt, (char *)wpf, p); if (mp != NULL) vn_finished_write(mp); /* Get rid of the dontlist entry, so that Layouts can be issued. */ NFSDDONTLISTLOCK(); LIST_REMOVE(mrp, nfsmr_list); NFSDDONTLISTUNLOCK(); free(mrp, M_NFSDSTATE); return (ret); } /* * Create a data storage file on the recovered DS. */ static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf, vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p, vnode_t *tvpp) { struct vattr va, nva; int error; /* Make data file name based on FH. */ error = VOP_GETATTR(vp, &va, cred); if (error == 0) { /* Set the attributes for "vp" to Setattr the DS vp. */ VATTR_NULL(&nva); nva.va_uid = va.va_uid; nva.va_gid = va.va_gid; nva.va_mode = va.va_mode; nva.va_size = 0; VATTR_NULL(&va); va.va_type = VREG; va.va_mode = nva.va_mode; NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf); error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL, pf->dsf_filename, cred, p, tvpp); } return (error); } /* * Look up the MDS file shared locked, and then get the extended attribute * to find the extant DS file to be copied to the new mirror. * If successful, *vpp is set to the MDS file's vp and *nvpp is * set to a DS data file for the MDS file, both exclusively locked. * The "buf" argument has the pnfsdsfile structure from the MDS file * in it and buflen is set to its length. */ int nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf, int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp, struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp, struct nfsdevice **fdsp) { struct nameidata nd; struct vnode *vp, *curvp; struct pnfsdsfile *pf; struct nfsmount *nmp, *curnmp; int dsdir, error, mirrorcnt, ippos; vp = NULL; curvp = NULL; curnmp = NULL; *dsp = NULL; *fdsp = NULL; if (dspathp == NULL && curdspathp != NULL) return (EPERM); /* * Look up the MDS file shared locked. The lock will be upgraded * to an exclusive lock after any rw layouts have been returned. */ NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp); NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, mdspathp, p); error = namei(&nd); NFSD_DEBUG(4, "lookup=%d\n", error); if (error != 0) return (error); if (nd.ni_vp->v_type != VREG) { vput(nd.ni_vp); NFSD_DEBUG(4, "mdspath not reg\n"); return (EISDIR); } vp = nd.ni_vp; if (curdspathp != NULL) { /* * Look up the current DS path and find the nfsdev structure for * it. */ NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp); NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, curdspathp, p); error = namei(&nd); NFSD_DEBUG(4, "ds lookup=%d\n", error); if (error != 0) { vput(vp); return (error); } if (nd.ni_vp->v_type != VDIR) { vput(nd.ni_vp); vput(vp); NFSD_DEBUG(4, "curdspath not dir\n"); return (ENOTDIR); } if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); vput(vp); NFSD_DEBUG(4, "curdspath not an NFS mount\n"); return (ENXIO); } curnmp = VFSTONFS(nd.ni_vp->v_mount); /* Search the nfsdev list for a match. */ NFSDDSLOCK(); *fdsp = nfsv4_findmirror(curnmp); NFSDDSUNLOCK(); if (*fdsp == NULL) curnmp = NULL; if (curnmp == NULL) { vput(nd.ni_vp); vput(vp); NFSD_DEBUG(4, "mdscopymr: no current ds\n"); return (ENXIO); } curvp = nd.ni_vp; } if (dspathp != NULL) { /* Look up the nfsdev path and find the nfsdev structure. */ NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp); NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, dspathp, p); error = namei(&nd); NFSD_DEBUG(4, "ds lookup=%d\n", error); if (error != 0) { vput(vp); if (curvp != NULL) vput(curvp); return (error); } if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) { vput(nd.ni_vp); vput(vp); if (curvp != NULL) vput(curvp); NFSD_DEBUG(4, "dspath not dir\n"); if (nd.ni_vp == curvp) return (EPERM); return (ENOTDIR); } if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); vput(vp); if (curvp != NULL) vput(curvp); NFSD_DEBUG(4, "dspath not an NFS mount\n"); return (ENXIO); } nmp = VFSTONFS(nd.ni_vp->v_mount); /* * Search the nfsdevice list for a match. If curnmp == NULL, * this is a recovery and there must be a mirror. */ NFSDDSLOCK(); if (curnmp == NULL) *dsp = nfsrv_findmirroredds(nmp); else *dsp = nfsv4_findmirror(nmp); NFSDDSUNLOCK(); if (*dsp == NULL) { vput(nd.ni_vp); vput(vp); if (curvp != NULL) vput(curvp); NFSD_DEBUG(4, "mdscopymr: no ds\n"); return (ENXIO); } } else { nd.ni_vp = NULL; nmp = NULL; } /* * Get a vp for an available DS data file using the extended * attribute on the MDS file. * If there is a valid entry for the new DS in the extended attribute * on the MDS file (as checked via the nmp argument), * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur. */ error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, &mirrorcnt, p, NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir); if (curvp != NULL) vput(curvp); if (nd.ni_vp == NULL) { if (error == 0 && nmp != NULL) { /* Search the nfsdev list for a match. */ NFSDDSLOCK(); *dsp = nfsrv_findmirroredds(nmp); NFSDDSUNLOCK(); } if (error == 0 && (nmp == NULL || *dsp == NULL)) { if (nvpp != NULL && *nvpp != NULL) { vput(*nvpp); *nvpp = NULL; } error = ENXIO; } } else vput(nd.ni_vp); /* * When dspathp != NULL and curdspathp == NULL, this is a recovery * and is only allowed if there is a 0.0.0.0 IP address entry. * When curdspathp != NULL, the ippos will be set to that entry. */ if (error == 0 && dspathp != NULL && ippos == -1) { if (nvpp != NULL && *nvpp != NULL) { vput(*nvpp); *nvpp = NULL; } error = ENXIO; } if (error == 0) { *vpp = vp; pf = (struct pnfsdsfile *)buf; if (ippos == -1) { /* If no zeroip pnfsdsfile, add one. */ ippos = *buflenp / sizeof(*pf); *buflenp += sizeof(*pf); pf += ippos; pf->dsf_dir = dsdir; strlcpy(pf->dsf_filename, fname, sizeof(pf->dsf_filename)); } else pf += ippos; *pfp = pf; } else vput(vp); return (error); } /* * Search for a matching pnfsd mirror device structure, base on the nmp arg. * Return one if found, NULL otherwise. */ static struct nfsdevice * nfsrv_findmirroredds(struct nfsmount *nmp) { struct nfsdevice *ds, *fndds; int fndmirror; mtx_assert(NFSDDSMUTEXPTR, MA_OWNED); /* * Search the DS server list for a match with nmp. * Remove the DS entry if found and there is a mirror. */ fndds = NULL; fndmirror = 0; if (nfsrv_devidcnt == 0) return (fndds); TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp == nmp) { NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n"); fndds = ds; break; } } if (fndds == NULL) return (fndds); if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0) fndmirror = 1; else if (fndds->nfsdev_mdsisset != 0) { /* For the fsid is set case, search for a mirror. */ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds != fndds && ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && fsidcmp(&ds->nfsdev_mdsfsid, &fndds->nfsdev_mdsfsid) == 0) { fndmirror = 1; break; } } } if (fndmirror == 0) { NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n"); return (NULL); } return (fndds); }